Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / timm /pr_2349 /en /changes.html

rtrm

about 2 months ago

download

raw

254 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Changelog","local":"changelog","sections":[{"title":"Aug 8, 2024","local":"aug-8-2024","sections":[],"depth":3},{"title":"July 28, 2024","local":"july-28-2024","sections":[],"depth":3},{"title":"July 26, 2024","local":"july-26-2024","sections":[],"depth":3},{"title":"June 24, 2024","local":"june-24-2024","sections":[],"depth":3},{"title":"June 12, 2024","local":"june-12-2024","sections":[],"depth":3},{"title":"May 14, 2024","local":"may-14-2024","sections":[],"depth":3},{"title":"May 11, 2024","local":"may-11-2024","sections":[],"depth":3},{"title":"April 11, 2024","local":"april-11-2024","sections":[],"depth":3},{"title":"Feb 19, 2024","local":"feb-19-2024","sections":[],"depth":3},{"title":"Jan 8, 2024","local":"jan-8-2024","sections":[],"depth":3},{"title":"Nov 23, 2023","local":"nov-23-2023","sections":[],"depth":3},{"title":"Nov 20, 2023","local":"nov-20-2023","sections":[],"depth":3},{"title":"Nov 3, 2023","local":"nov-3-2023","sections":[],"depth":3},{"title":"Oct 20, 2023","local":"oct-20-2023","sections":[],"depth":3},{"title":"Sep 1, 2023","local":"sep-1-2023","sections":[],"depth":3},{"title":"Aug 28, 2023","local":"aug-28-2023","sections":[],"depth":3},{"title":"Aug 25, 2023","local":"aug-25-2023","sections":[],"depth":3},{"title":"Aug 11, 2023","local":"aug-11-2023","sections":[],"depth":3},{"title":"Aug 3, 2023","local":"aug-3-2023","sections":[],"depth":3},{"title":"July 27, 2023","local":"july-27-2023","sections":[],"depth":3},{"title":"May 11, 2023","local":"may-11-2023","sections":[],"depth":3},{"title":"May 10, 2023","local":"may-10-2023","sections":[],"depth":3},{"title":"April 27, 2023","local":"april-27-2023","sections":[],"depth":3},{"title":"April 21, 2023","local":"april-21-2023","sections":[],"depth":3},{"title":"April 12, 2023","local":"april-12-2023","sections":[],"depth":3},{"title":"April 5, 2023","local":"april-5-2023","sections":[],"depth":3},{"title":"March 31, 2023","local":"march-31-2023","sections":[],"depth":3},{"title":"March 22, 2023","local":"march-22-2023","sections":[],"depth":3},{"title":"Feb 26, 2023","local":"feb-26-2023","sections":[],"depth":3},{"title":"Feb 20, 2023","local":"feb-20-2023","sections":[],"depth":3},{"title":"Feb 16, 2023","local":"feb-16-2023","sections":[],"depth":3},{"title":"Feb 7, 2023","local":"feb-7-2023","sections":[],"depth":3},{"title":"Jan 20, 2023","local":"jan-20-2023","sections":[],"depth":3},{"title":"Jan 11, 2023","local":"jan-11-2023","sections":[],"depth":3},{"title":"Jan 6, 2023","local":"jan-6-2023","sections":[],"depth":3},{"title":"Jan 5, 2023","local":"jan-5-2023","sections":[],"depth":3},{"title":"Dec 23, 2022 🎄☃","local":"dec-23-2022-","sections":[],"depth":3},{"title":"Dec 8, 2022","local":"dec-8-2022","sections":[],"depth":3},{"title":"Dec 6, 2022","local":"dec-6-2022","sections":[],"depth":3},{"title":"Dec 5, 2022","local":"dec-5-2022","sections":[],"depth":3},{"title":"Oct 15, 2022","local":"oct-15-2022","sections":[],"depth":3},{"title":"Oct 10, 2022","local":"oct-10-2022","sections":[],"depth":3},{"title":"Sept 23, 2022","local":"sept-23-2022","sections":[],"depth":3},{"title":"Sept 7, 2022","local":"sept-7-2022","sections":[],"depth":3},{"title":"Aug 29, 2022","local":"aug-29-2022","sections":[],"depth":3},{"title":"Aug 26, 2022","local":"aug-26-2022","sections":[],"depth":3},{"title":"Aug 15, 2022","local":"aug-15-2022","sections":[],"depth":3},{"title":"Aug 5, 2022","local":"aug-5-2022","sections":[],"depth":3},{"title":"July 28, 2022","local":"july-28-2022","sections":[],"depth":3},{"title":"July 27, 2022","local":"july-27-2022","sections":[],"depth":3},{"title":"July 8, 2022","local":"july-8-2022","sections":[],"depth":3},{"title":"May 13, 2022","local":"may-13-2022","sections":[],"depth":3},{"title":"May 2, 2022","local":"may-2-2022","sections":[],"depth":3},{"title":"April 22, 2022","local":"april-22-2022","sections":[],"depth":3},{"title":"March 23, 2022","local":"march-23-2022","sections":[],"depth":3},{"title":"March 21, 2022","local":"march-21-2022","sections":[],"depth":3},{"title":"Feb 2, 2022","local":"feb-2-2022","sections":[],"depth":3},{"title":"Jan 14, 2022","local":"jan-14-2022","sections":[],"depth":3},{"title":"Jan 5, 2023","local":"jan-5-2023","sections":[],"depth":3},{"title":"Dec 23, 2022 🎄☃","local":"dec-23-2022-","sections":[],"depth":3},{"title":"Dec 8, 2022","local":"dec-8-2022","sections":[],"depth":3},{"title":"Dec 6, 2022","local":"dec-6-2022","sections":[],"depth":3},{"title":"Dec 5, 2022","local":"dec-5-2022","sections":[],"depth":3},{"title":"Oct 15, 2022","local":"oct-15-2022","sections":[],"depth":3},{"title":"Oct 10, 2022","local":"oct-10-2022","sections":[],"depth":3},{"title":"Sept 23, 2022","local":"sept-23-2022","sections":[],"depth":3},{"title":"Sept 7, 2022","local":"sept-7-2022","sections":[],"depth":3},{"title":"Aug 29, 2022","local":"aug-29-2022","sections":[],"depth":3},{"title":"Aug 26, 2022","local":"aug-26-2022","sections":[],"depth":3},{"title":"Aug 15, 2022","local":"aug-15-2022","sections":[],"depth":3},{"title":"Aug 5, 2022","local":"aug-5-2022","sections":[],"depth":3},{"title":"July 28, 2022","local":"july-28-2022","sections":[],"depth":3},{"title":"July 27, 2022","local":"july-27-2022","sections":[],"depth":3},{"title":"July 8, 2022","local":"july-8-2022","sections":[],"depth":3},{"title":"May 13, 2022","local":"may-13-2022","sections":[],"depth":3},{"title":"May 2, 2022","local":"may-2-2022","sections":[],"depth":3},{"title":"April 22, 2022","local":"april-22-2022","sections":[],"depth":3},{"title":"March 23, 2022","local":"march-23-2022","sections":[],"depth":3},{"title":"March 21, 2022","local":"march-21-2022","sections":[],"depth":3},{"title":"Feb 2, 2022","local":"feb-2-2022","sections":[],"depth":3},{"title":"Jan 14, 2022","local":"jan-14-2022","sections":[],"depth":3}],"depth":1}">
	<link href="/docs/timm/pr_2349/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/entry/start.f2138890.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/scheduler.85c25b89.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/singletons.d0363996.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/paths.95674127.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/entry/app.226c8755.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/index.c9837788.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/nodes/0.dc2733c7.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/nodes/2.835e7241.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/CodeBlock.52fa569e.js">
	<link rel="modulepreload" href="/docs/timm/pr_2349/en/_app/immutable/chunks/EditOnGithub.b65eee75.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Changelog","local":"changelog","sections":[{"title":"Aug 8, 2024","local":"aug-8-2024","sections":[],"depth":3},{"title":"July 28, 2024","local":"july-28-2024","sections":[],"depth":3},{"title":"July 26, 2024","local":"july-26-2024","sections":[],"depth":3},{"title":"June 24, 2024","local":"june-24-2024","sections":[],"depth":3},{"title":"June 12, 2024","local":"june-12-2024","sections":[],"depth":3},{"title":"May 14, 2024","local":"may-14-2024","sections":[],"depth":3},{"title":"May 11, 2024","local":"may-11-2024","sections":[],"depth":3},{"title":"April 11, 2024","local":"april-11-2024","sections":[],"depth":3},{"title":"Feb 19, 2024","local":"feb-19-2024","sections":[],"depth":3},{"title":"Jan 8, 2024","local":"jan-8-2024","sections":[],"depth":3},{"title":"Nov 23, 2023","local":"nov-23-2023","sections":[],"depth":3},{"title":"Nov 20, 2023","local":"nov-20-2023","sections":[],"depth":3},{"title":"Nov 3, 2023","local":"nov-3-2023","sections":[],"depth":3},{"title":"Oct 20, 2023","local":"oct-20-2023","sections":[],"depth":3},{"title":"Sep 1, 2023","local":"sep-1-2023","sections":[],"depth":3},{"title":"Aug 28, 2023","local":"aug-28-2023","sections":[],"depth":3},{"title":"Aug 25, 2023","local":"aug-25-2023","sections":[],"depth":3},{"title":"Aug 11, 2023","local":"aug-11-2023","sections":[],"depth":3},{"title":"Aug 3, 2023","local":"aug-3-2023","sections":[],"depth":3},{"title":"July 27, 2023","local":"july-27-2023","sections":[],"depth":3},{"title":"May 11, 2023","local":"may-11-2023","sections":[],"depth":3},{"title":"May 10, 2023","local":"may-10-2023","sections":[],"depth":3},{"title":"April 27, 2023","local":"april-27-2023","sections":[],"depth":3},{"title":"April 21, 2023","local":"april-21-2023","sections":[],"depth":3},{"title":"April 12, 2023","local":"april-12-2023","sections":[],"depth":3},{"title":"April 5, 2023","local":"april-5-2023","sections":[],"depth":3},{"title":"March 31, 2023","local":"march-31-2023","sections":[],"depth":3},{"title":"March 22, 2023","local":"march-22-2023","sections":[],"depth":3},{"title":"Feb 26, 2023","local":"feb-26-2023","sections":[],"depth":3},{"title":"Feb 20, 2023","local":"feb-20-2023","sections":[],"depth":3},{"title":"Feb 16, 2023","local":"feb-16-2023","sections":[],"depth":3},{"title":"Feb 7, 2023","local":"feb-7-2023","sections":[],"depth":3},{"title":"Jan 20, 2023","local":"jan-20-2023","sections":[],"depth":3},{"title":"Jan 11, 2023","local":"jan-11-2023","sections":[],"depth":3},{"title":"Jan 6, 2023","local":"jan-6-2023","sections":[],"depth":3},{"title":"Jan 5, 2023","local":"jan-5-2023","sections":[],"depth":3},{"title":"Dec 23, 2022 🎄☃","local":"dec-23-2022-","sections":[],"depth":3},{"title":"Dec 8, 2022","local":"dec-8-2022","sections":[],"depth":3},{"title":"Dec 6, 2022","local":"dec-6-2022","sections":[],"depth":3},{"title":"Dec 5, 2022","local":"dec-5-2022","sections":[],"depth":3},{"title":"Oct 15, 2022","local":"oct-15-2022","sections":[],"depth":3},{"title":"Oct 10, 2022","local":"oct-10-2022","sections":[],"depth":3},{"title":"Sept 23, 2022","local":"sept-23-2022","sections":[],"depth":3},{"title":"Sept 7, 2022","local":"sept-7-2022","sections":[],"depth":3},{"title":"Aug 29, 2022","local":"aug-29-2022","sections":[],"depth":3},{"title":"Aug 26, 2022","local":"aug-26-2022","sections":[],"depth":3},{"title":"Aug 15, 2022","local":"aug-15-2022","sections":[],"depth":3},{"title":"Aug 5, 2022","local":"aug-5-2022","sections":[],"depth":3},{"title":"July 28, 2022","local":"july-28-2022","sections":[],"depth":3},{"title":"July 27, 2022","local":"july-27-2022","sections":[],"depth":3},{"title":"July 8, 2022","local":"july-8-2022","sections":[],"depth":3},{"title":"May 13, 2022","local":"may-13-2022","sections":[],"depth":3},{"title":"May 2, 2022","local":"may-2-2022","sections":[],"depth":3},{"title":"April 22, 2022","local":"april-22-2022","sections":[],"depth":3},{"title":"March 23, 2022","local":"march-23-2022","sections":[],"depth":3},{"title":"March 21, 2022","local":"march-21-2022","sections":[],"depth":3},{"title":"Feb 2, 2022","local":"feb-2-2022","sections":[],"depth":3},{"title":"Jan 14, 2022","local":"jan-14-2022","sections":[],"depth":3},{"title":"Jan 5, 2023","local":"jan-5-2023","sections":[],"depth":3},{"title":"Dec 23, 2022 🎄☃","local":"dec-23-2022-","sections":[],"depth":3},{"title":"Dec 8, 2022","local":"dec-8-2022","sections":[],"depth":3},{"title":"Dec 6, 2022","local":"dec-6-2022","sections":[],"depth":3},{"title":"Dec 5, 2022","local":"dec-5-2022","sections":[],"depth":3},{"title":"Oct 15, 2022","local":"oct-15-2022","sections":[],"depth":3},{"title":"Oct 10, 2022","local":"oct-10-2022","sections":[],"depth":3},{"title":"Sept 23, 2022","local":"sept-23-2022","sections":[],"depth":3},{"title":"Sept 7, 2022","local":"sept-7-2022","sections":[],"depth":3},{"title":"Aug 29, 2022","local":"aug-29-2022","sections":[],"depth":3},{"title":"Aug 26, 2022","local":"aug-26-2022","sections":[],"depth":3},{"title":"Aug 15, 2022","local":"aug-15-2022","sections":[],"depth":3},{"title":"Aug 5, 2022","local":"aug-5-2022","sections":[],"depth":3},{"title":"July 28, 2022","local":"july-28-2022","sections":[],"depth":3},{"title":"July 27, 2022","local":"july-27-2022","sections":[],"depth":3},{"title":"July 8, 2022","local":"july-8-2022","sections":[],"depth":3},{"title":"May 13, 2022","local":"may-13-2022","sections":[],"depth":3},{"title":"May 2, 2022","local":"may-2-2022","sections":[],"depth":3},{"title":"April 22, 2022","local":"april-22-2022","sections":[],"depth":3},{"title":"March 23, 2022","local":"march-23-2022","sections":[],"depth":3},{"title":"March 21, 2022","local":"march-21-2022","sections":[],"depth":3},{"title":"Feb 2, 2022","local":"feb-2-2022","sections":[],"depth":3},{"title":"Jan 14, 2022","local":"jan-14-2022","sections":[],"depth":3}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="changelog" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#changelog"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Changelog</span></h1> <h3 class="relative group"><a id="aug-8-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-8-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 8, 2024</span></h3> <ul data-svelte-h="svelte-53ephs"><li>Add RDNet (‘DenseNets Reloaded’, <a href="https://arxiv.org/abs/2403.19588" rel="nofollow">https://arxiv.org/abs/2403.19588</a>), thanks <a href="https://github.com/dhkim0225" rel="nofollow">Donghyun Kim</a></li></ul> <h3 class="relative group"><a id="july-28-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-28-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 28, 2024</span></h3> <ul data-svelte-h="svelte-1u573m6"><li>Add <code>mobilenet_edgetpu_v2_m</code> weights w/ <code>ra4</code> mnv4-small based recipe. 80.1% top-1 @ 224 and 80.7 @ 256.</li> <li>Release 1.0.8</li></ul> <h3 class="relative group"><a id="july-26-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-26-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 26, 2024</span></h3> <ul data-svelte-h="svelte-fjjvk3"><li>More MobileNet-v4 weights, ImageNet-12k pretrain w/ fine-tunes, and anti-aliased ConvLarge models</li></ul> <table data-svelte-h="svelte-1c1vli2"><thead><tr><th>model</th> <th>top1</th> <th>top1_err</th> <th>top5</th> <th>top5_err</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k</a></td> <td>84.99</td> <td>15.01</td> <td>97.294</td> <td>2.706</td> <td>32.59</td> <td>544</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k</a></td> <td>84.772</td> <td>15.228</td> <td>97.344</td> <td>2.656</td> <td>32.59</td> <td>480</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e230_r448_in12k_ft_in1k</a></td> <td>84.64</td> <td>15.36</td> <td>97.114</td> <td>2.886</td> <td>32.59</td> <td>448</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e230_r384_in12k_ft_in1k</a></td> <td>84.314</td> <td>15.686</td> <td>97.102</td> <td>2.898</td> <td>32.59</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e600_r384_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e600_r384_in1k</a></td> <td>83.824</td> <td>16.176</td> <td>96.734</td> <td>3.266</td> <td>32.59</td> <td>480</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_aa_large.e600_r384_in1k" rel="nofollow">mobilenetv4_conv_aa_large.e600_r384_in1k</a></td> <td>83.244</td> <td>16.756</td> <td>96.392</td> <td>3.608</td> <td>32.59</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k" rel="nofollow">mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k</a></td> <td>82.99</td> <td>17.01</td> <td>96.67</td> <td>3.33</td> <td>11.07</td> <td>320</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k" rel="nofollow">mobilenetv4_hybrid_medium.e200_r256_in12k_ft_in1k</a></td> <td>82.364</td> <td>17.636</td> <td>96.256</td> <td>3.744</td> <td>11.07</td> <td>256</td></tr></tbody></table> <ul data-svelte-h="svelte-t8ij7y"><li>Impressive MobileNet-V1 and EfficientNet-B0 baseline challenges (<a href="https://huggingface.co/blog/rwightman/mobilenet-baselines" rel="nofollow">https://huggingface.co/blog/rwightman/mobilenet-baselines</a>)</li></ul> <table data-svelte-h="svelte-1gmskbg"><thead><tr><th>model</th> <th>top1</th> <th>top1_err</th> <th>top5</th> <th>top5_err</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="http://hf.co/timm/efficientnet_b0.ra4_e3600_r224_in1k" rel="nofollow">efficientnet_b0.ra4_e3600_r224_in1k</a></td> <td>79.364</td> <td>20.636</td> <td>94.754</td> <td>5.246</td> <td>5.29</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/efficientnet_b0.ra4_e3600_r224_in1k" rel="nofollow">efficientnet_b0.ra4_e3600_r224_in1k</a></td> <td>78.584</td> <td>21.416</td> <td>94.338</td> <td>5.662</td> <td>5.29</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv1_100h.ra4_e3600_r224_in1k" rel="nofollow">mobilenetv1_100h.ra4_e3600_r224_in1k</a></td> <td>76.596</td> <td>23.404</td> <td>93.272</td> <td>6.728</td> <td>5.28</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv1_100.ra4_e3600_r224_in1k" rel="nofollow">mobilenetv1_100.ra4_e3600_r224_in1k</a></td> <td>76.094</td> <td>23.906</td> <td>93.004</td> <td>6.996</td> <td>4.23</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv1_100h.ra4_e3600_r224_in1k" rel="nofollow">mobilenetv1_100h.ra4_e3600_r224_in1k</a></td> <td>75.662</td> <td>24.338</td> <td>92.504</td> <td>7.496</td> <td>5.28</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv1_100.ra4_e3600_r224_in1k" rel="nofollow">mobilenetv1_100.ra4_e3600_r224_in1k</a></td> <td>75.382</td> <td>24.618</td> <td>92.312</td> <td>7.688</td> <td>4.23</td> <td>224</td></tr></tbody></table> <ul data-svelte-h="svelte-ypqji1"><li>Prototype of <code>set_input_size()</code> added to vit and swin v1/v2 models to allow changing image size, patch size, window size after model creation.</li> <li>Improved support in swin for different size handling, in addition to <code>set_input_size</code>, <code>always_partition</code> and <code>strict_img_size</code> args have been added to <code>__init__</code> to allow more flexible input size constraints</li> <li>Fix out of order indices info for intermediate ‘Getter’ feature wrapper, check out or range indices for same.</li> <li>Add several <code>tiny</code> < .5M param models for testing that are actually trained on ImageNet-1k</li></ul> <table data-svelte-h="svelte-atnwzb"><thead><tr><th>model</th> <th>top1</th> <th>top1_err</th> <th>top5</th> <th>top5_err</th> <th>param_count</th> <th>img_size</th> <th>crop_pct</th></tr></thead> <tbody><tr><td>test_efficientnet.r160_in1k</td> <td>47.156</td> <td>52.844</td> <td>71.726</td> <td>28.274</td> <td>0.36</td> <td>192</td> <td>1.0</td></tr> <tr><td>test_byobnet.r160_in1k</td> <td>46.698</td> <td>53.302</td> <td>71.674</td> <td>28.326</td> <td>0.46</td> <td>192</td> <td>1.0</td></tr> <tr><td>test_efficientnet.r160_in1k</td> <td>46.426</td> <td>53.574</td> <td>70.928</td> <td>29.072</td> <td>0.36</td> <td>160</td> <td>0.875</td></tr> <tr><td>test_byobnet.r160_in1k</td> <td>45.378</td> <td>54.622</td> <td>70.572</td> <td>29.428</td> <td>0.46</td> <td>160</td> <td>0.875</td></tr> <tr><td>test_vit.r160_in1k</td> <td>42.0</td> <td>58.0</td> <td>68.664</td> <td>31.336</td> <td>0.37</td> <td>192</td> <td>1.0</td></tr> <tr><td>test_vit.r160_in1k</td> <td>40.822</td> <td>59.178</td> <td>67.212</td> <td>32.788</td> <td>0.37</td> <td>160</td> <td>0.875</td></tr></tbody></table> <ul data-svelte-h="svelte-q0wk82"><li>Fix vit reg token init, thanks <a href="https://github.com/Promisery" rel="nofollow">Promisery</a></li> <li>Other misc fixes</li></ul> <h3 class="relative group"><a id="june-24-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#june-24-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>June 24, 2024</span></h3> <ul data-svelte-h="svelte-11d05vq"><li>3 more MobileNetV4 hyrid weights with different MQA weight init scheme</li></ul> <table data-svelte-h="svelte-1nhomc3"><thead><tr><th>model</th> <th>top1</th> <th>top1_err</th> <th>top5</th> <th>top5_err</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_large.ix_e600_r384_in1k" rel="nofollow">mobilenetv4_hybrid_large.ix_e600_r384_in1k</a></td> <td>84.356</td> <td>15.644</td> <td>96.892</td> <td>3.108</td> <td>37.76</td> <td>448</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_large.ix_e600_r384_in1k" rel="nofollow">mobilenetv4_hybrid_large.ix_e600_r384_in1k</a></td> <td>83.990</td> <td>16.010</td> <td>96.702</td> <td>3.298</td> <td>37.76</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r384_in1k" rel="nofollow">mobilenetv4_hybrid_medium.ix_e550_r384_in1k</a></td> <td>83.394</td> <td>16.606</td> <td>96.760</td> <td>3.240</td> <td>11.07</td> <td>448</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r384_in1k" rel="nofollow">mobilenetv4_hybrid_medium.ix_e550_r384_in1k</a></td> <td>82.968</td> <td>17.032</td> <td>96.474</td> <td>3.526</td> <td>11.07</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r256_in1k" rel="nofollow">mobilenetv4_hybrid_medium.ix_e550_r256_in1k</a></td> <td>82.492</td> <td>17.508</td> <td>96.278</td> <td>3.722</td> <td>11.07</td> <td>320</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.ix_e550_r256_in1k" rel="nofollow">mobilenetv4_hybrid_medium.ix_e550_r256_in1k</a></td> <td>81.446</td> <td>18.554</td> <td>95.704</td> <td>4.296</td> <td>11.07</td> <td>256</td></tr></tbody></table> <ul data-svelte-h="svelte-1m4wkdr"><li>florence2 weight loading in DaViT model</li></ul> <h3 class="relative group"><a id="june-12-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#june-12-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>June 12, 2024</span></h3> <ul data-svelte-h="svelte-dr3wb8"><li>MobileNetV4 models and initial set of <code>timm</code> trained weights added:</li></ul> <table data-svelte-h="svelte-iydd9c"><thead><tr><th>model</th> <th>top1</th> <th>top1_err</th> <th>top5</th> <th>top5_err</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_large.e600_r384_in1k" rel="nofollow">mobilenetv4_hybrid_large.e600_r384_in1k</a></td> <td>84.266</td> <td>15.734</td> <td>96.936</td> <td>3.064</td> <td>37.76</td> <td>448</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_large.e600_r384_in1k" rel="nofollow">mobilenetv4_hybrid_large.e600_r384_in1k</a></td> <td>83.800</td> <td>16.200</td> <td>96.770</td> <td>3.230</td> <td>37.76</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_large.e600_r384_in1k" rel="nofollow">mobilenetv4_conv_large.e600_r384_in1k</a></td> <td>83.392</td> <td>16.608</td> <td>96.622</td> <td>3.378</td> <td>32.59</td> <td>448</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_large.e600_r384_in1k" rel="nofollow">mobilenetv4_conv_large.e600_r384_in1k</a></td> <td>82.952</td> <td>17.048</td> <td>96.266</td> <td>3.734</td> <td>32.59</td> <td>384</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_large.e500_r256_in1k" rel="nofollow">mobilenetv4_conv_large.e500_r256_in1k</a></td> <td>82.674</td> <td>17.326</td> <td>96.31</td> <td>3.69</td> <td>32.59</td> <td>320</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_large.e500_r256_in1k" rel="nofollow">mobilenetv4_conv_large.e500_r256_in1k</a></td> <td>81.862</td> <td>18.138</td> <td>95.69</td> <td>4.31</td> <td>32.59</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_hybrid_medium.e500_r224_in1k</a></td> <td>81.276</td> <td>18.724</td> <td>95.742</td> <td>4.258</td> <td>11.07</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_medium.e500_r256_in1k" rel="nofollow">mobilenetv4_conv_medium.e500_r256_in1k</a></td> <td>80.858</td> <td>19.142</td> <td>95.768</td> <td>4.232</td> <td>9.72</td> <td>320</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_hybrid_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_hybrid_medium.e500_r224_in1k</a></td> <td>80.442</td> <td>19.558</td> <td>95.38</td> <td>4.62</td> <td>11.07</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_blur_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_conv_blur_medium.e500_r224_in1k</a></td> <td>80.142</td> <td>19.858</td> <td>95.298</td> <td>4.702</td> <td>9.72</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_medium.e500_r256_in1k" rel="nofollow">mobilenetv4_conv_medium.e500_r256_in1k</a></td> <td>79.928</td> <td>20.072</td> <td>95.184</td> <td>4.816</td> <td>9.72</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_conv_medium.e500_r224_in1k</a></td> <td>79.808</td> <td>20.192</td> <td>95.186</td> <td>4.814</td> <td>9.72</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_blur_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_conv_blur_medium.e500_r224_in1k</a></td> <td>79.438</td> <td>20.562</td> <td>94.932</td> <td>5.068</td> <td>9.72</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_medium.e500_r224_in1k" rel="nofollow">mobilenetv4_conv_medium.e500_r224_in1k</a></td> <td>79.094</td> <td>20.906</td> <td>94.77</td> <td>5.23</td> <td>9.72</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_small.e2400_r224_in1k" rel="nofollow">mobilenetv4_conv_small.e2400_r224_in1k</a></td> <td>74.616</td> <td>25.384</td> <td>92.072</td> <td>7.928</td> <td>3.77</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_small.e1200_r224_in1k" rel="nofollow">mobilenetv4_conv_small.e1200_r224_in1k</a></td> <td>74.292</td> <td>25.708</td> <td>92.116</td> <td>7.884</td> <td>3.77</td> <td>256</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_small.e2400_r224_in1k" rel="nofollow">mobilenetv4_conv_small.e2400_r224_in1k</a></td> <td>73.756</td> <td>26.244</td> <td>91.422</td> <td>8.578</td> <td>3.77</td> <td>224</td></tr> <tr><td><a href="http://hf.co/timm/mobilenetv4_conv_small.e1200_r224_in1k" rel="nofollow">mobilenetv4_conv_small.e1200_r224_in1k</a></td> <td>73.454</td> <td>26.546</td> <td>91.34</td> <td>8.66</td> <td>3.77</td> <td>224</td></tr></tbody></table> <ul data-svelte-h="svelte-1niwir6"><li>Apple MobileCLIP (<a href="https://arxiv.org/pdf/2311.17049" rel="nofollow">https://arxiv.org/pdf/2311.17049</a>, FastViT and ViT-B) image tower model support & weights added (part of OpenCLIP support).</li> <li>ViTamin (<a href="https://arxiv.org/abs/2404.02132" rel="nofollow">https://arxiv.org/abs/2404.02132</a>) CLIP image tower model & weights added (part of OpenCLIP support).</li> <li>OpenAI CLIP Modified ResNet image tower modelling & weight support (via ByobNet). Refactor AttentionPool2d.</li></ul> <h3 class="relative group"><a id="may-14-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-14-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 14, 2024</span></h3> <ul data-svelte-h="svelte-1llm05f"><li>Support loading PaliGemma jax weights into SigLIP ViT models with average pooling.</li> <li>Add Hiera models from Meta (<a href="https://github.com/facebookresearch/hiera" rel="nofollow">https://github.com/facebookresearch/hiera</a>).</li> <li>Add <code>normalize=</code> flag for transorms, return non-normalized torch.Tensor with original dytpe (for <code>chug</code>)</li> <li>Version 1.0.3 release</li></ul> <h3 class="relative group"><a id="may-11-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-11-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 11, 2024</span></h3> <ul data-svelte-h="svelte-6dlbg0"><li><code>Searching for Better ViT Baselines (For the GPU Poor)</code> weights and vit variants released. Exploring model shapes between Tiny and Base.</li></ul> <table data-svelte-h="svelte-13u9k9c"><thead><tr><th>model</th> <th>top1</th> <th>top5</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1k" rel="nofollow">vit_mediumd_patch16_reg4_gap_256.sbb_in12k_ft_in1k</a></td> <td>86.202</td> <td>97.874</td> <td>64.11</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1k" rel="nofollow">vit_betwixt_patch16_reg4_gap_256.sbb_in12k_ft_in1k</a></td> <td>85.418</td> <td>97.48</td> <td>60.4</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k" rel="nofollow">vit_mediumd_patch16_rope_reg1_gap_256.sbb_in1k</a></td> <td>84.322</td> <td>96.812</td> <td>63.95</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k" rel="nofollow">vit_betwixt_patch16_rope_reg4_gap_256.sbb_in1k</a></td> <td>83.906</td> <td>96.684</td> <td>60.23</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_base_patch16_rope_reg1_gap_256.sbb_in1k" rel="nofollow">vit_base_patch16_rope_reg1_gap_256.sbb_in1k</a></td> <td>83.866</td> <td>96.67</td> <td>86.43</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_medium_patch16_rope_reg1_gap_256.sbb_in1k" rel="nofollow">vit_medium_patch16_rope_reg1_gap_256.sbb_in1k</a></td> <td>83.81</td> <td>96.824</td> <td>38.74</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in1k" rel="nofollow">vit_betwixt_patch16_reg4_gap_256.sbb_in1k</a></td> <td>83.706</td> <td>96.616</td> <td>60.4</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_betwixt_patch16_reg1_gap_256.sbb_in1k" rel="nofollow">vit_betwixt_patch16_reg1_gap_256.sbb_in1k</a></td> <td>83.628</td> <td>96.544</td> <td>60.4</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_medium_patch16_reg4_gap_256.sbb_in1k" rel="nofollow">vit_medium_patch16_reg4_gap_256.sbb_in1k</a></td> <td>83.47</td> <td>96.622</td> <td>38.88</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_medium_patch16_reg1_gap_256.sbb_in1k" rel="nofollow">vit_medium_patch16_reg1_gap_256.sbb_in1k</a></td> <td>83.462</td> <td>96.548</td> <td>38.88</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_little_patch16_reg4_gap_256.sbb_in1k" rel="nofollow">vit_little_patch16_reg4_gap_256.sbb_in1k</a></td> <td>82.514</td> <td>96.262</td> <td>22.52</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_wee_patch16_reg1_gap_256.sbb_in1k" rel="nofollow">vit_wee_patch16_reg1_gap_256.sbb_in1k</a></td> <td>80.256</td> <td>95.360</td> <td>13.42</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_pwee_patch16_reg1_gap_256.sbb_in1k" rel="nofollow">vit_pwee_patch16_reg1_gap_256.sbb_in1k</a></td> <td>80.072</td> <td>95.136</td> <td>15.25</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_mediumd_patch16_reg4_gap_256.sbb_in12k" rel="nofollow">vit_mediumd_patch16_reg4_gap_256.sbb_in12k</a></td> <td>N/A</td> <td>N/A</td> <td>64.11</td> <td>256</td></tr> <tr><td><a href="https://huggingface.co/timm/vit_betwixt_patch16_reg4_gap_256.sbb_in12k" rel="nofollow">vit_betwixt_patch16_reg4_gap_256.sbb_in12k</a></td> <td>N/A</td> <td>N/A</td> <td>60.4</td> <td>256</td></tr></tbody></table> <ul data-svelte-h="svelte-10hxznx"><li>AttentionExtract helper added to extract attention maps from <code>timm</code> models. See example in <a href="https://github.com/huggingface/pytorch-image-models/discussions/1232#discussioncomment-9320949" rel="nofollow">https://github.com/huggingface/pytorch-image-models/discussions/1232#discussioncomment-9320949</a></li> <li><code>forward_intermediates()</code> API refined and added to more models including some ConvNets that have other extraction methods.</li> <li>1017 of 1047 model architectures support <code>features_only=True</code> feature extraction. Remaining 34 architectures can be supported but based on priority requests.</li> <li>Remove torch.jit.script annotated functions including old JIT activations. Conflict with dynamo and dynamo does a much better job when used.</li></ul> <h3 class="relative group"><a id="april-11-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-11-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 11, 2024</span></h3> <ul data-svelte-h="svelte-k3yei1"><li>Prepping for a long overdue 1.0 release, things have been stable for a while now.</li> <li>Significant feature that’s been missing for a while, <code>features_only=True</code> support for ViT models with flat hidden states or non-std module layouts (so far covering <code>'vit_', 'twins_', 'deit', 'beit', 'mvitv2', 'eva', 'samvit_', 'flexivit'</code>)</li> <li>Above feature support achieved through a new <code>forward_intermediates()</code> API that can be used with a feature wrapping module or direclty.</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = timm.create_model(<span class="hljs-string">'vit_base_patch16_224'</span>)
	final_feat, intermediates = model.forward_intermediates(<span class="hljs-built_in">input</span>)
	output = model.forward_head(final_feat) <span class="hljs-comment"># pooling + classifier head</span>

	<span class="hljs-built_in">print</span>(final_feat.shape)
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">197</span>, <span class="hljs-number">768</span>])

	<span class="hljs-keyword">for</span> f <span class="hljs-keyword">in</span> intermediates:
	<span class="hljs-built_in">print</span>(f.shape)
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">14</span>, <span class="hljs-number">14</span>])

	<span class="hljs-built_in">print</span>(output.shape)
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">1000</span>])<!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = timm.create_model(<span class="hljs-string">'eva02_base_patch16_clip_224'</span>, pretrained=<span class="hljs-literal">True</span>, img_size=<span class="hljs-number">512</span>, features_only=<span class="hljs-literal">True</span>, out_indices=(-<span class="hljs-number">3</span>, -<span class="hljs-number">2</span>,))
	output = model(torch.randn(<span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">512</span>, <span class="hljs-number">512</span>))

	<span class="hljs-keyword">for</span> o <span class="hljs-keyword">in</span> output:
	<span class="hljs-built_in">print</span>(o.shape)
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">32</span>, <span class="hljs-number">32</span>])
	torch.Size([<span class="hljs-number">2</span>, <span class="hljs-number">768</span>, <span class="hljs-number">32</span>, <span class="hljs-number">32</span>])<!-- HTML_TAG_END --></pre></div> <ul data-svelte-h="svelte-qmozhb"><li>TinyCLIP vision tower weights added, thx <a href="https://github.com/gau-nernst" rel="nofollow">Thien Tran</a></li></ul> <h3 class="relative group"><a id="feb-19-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-19-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 19, 2024</span></h3> <ul data-svelte-h="svelte-1hnhcdw"><li>Next-ViT models added. Adapted from <a href="https://github.com/bytedance/Next-ViT" rel="nofollow">https://github.com/bytedance/Next-ViT</a></li> <li>HGNet and PP-HGNetV2 models added. Adapted from <a href="https://github.com/PaddlePaddle/PaddleClas" rel="nofollow">https://github.com/PaddlePaddle/PaddleClas</a> by <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>Removed setup.py, moved to pyproject.toml based build supported by PDM</li> <li>Add updated model EMA impl using _for_each for less overhead</li> <li>Support device args in train script for non GPU devices</li> <li>Other misc fixes and small additions</li> <li>Min supported Python version increased to 3.8</li> <li>Release 0.9.16</li></ul> <h3 class="relative group"><a id="jan-8-2024" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-8-2024"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 8, 2024</span></h3> <p data-svelte-h="svelte-wgao7f">Datasets & transform refactoring</p> <ul data-svelte-h="svelte-19bcgju"><li>HuggingFace streaming (iterable) dataset support (<code>--dataset hfids:org/dataset</code>)</li> <li>Webdataset wrapper tweaks for improved split info fetching, can auto fetch splits from supported HF hub webdataset</li> <li>Tested HF <code>datasets</code> and webdataset wrapper streaming from HF hub with recent <code>timm</code> ImageNet uploads to <a href="https://huggingface.co/timm" rel="nofollow">https://huggingface.co/timm</a></li> <li>Make input & target column/field keys consistent across datasets and pass via args</li> <li>Full monochrome support when using e:g: <code>--input-size 1 224 224</code> or <code>--in-chans 1</code>, sets PIL image conversion appropriately in dataset</li> <li>Improved several alternate crop & resize transforms (ResizeKeepRatio, RandomCropOrPad, etc) for use in PixParse document AI project</li> <li>Add SimCLR style color jitter prob along with grayscale and gaussian blur options to augmentations and args</li> <li>Allow train without validation set (<code>--val-split ''</code>) in train script</li> <li>Add <code>--bce-sum</code> (sum over class dim) and <code>--bce-pos-weight</code> (positive weighting) args for training as they’re common BCE loss tweaks I was often hard coding</li></ul> <h3 class="relative group"><a id="nov-23-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nov-23-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Nov 23, 2023</span></h3> <ul data-svelte-h="svelte-10hvae7"><li>Added EfficientViT-Large models, thanks <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>Fix Python 3.7 compat, will be dropping support for it soon</li> <li>Other misc fixes</li> <li>Release 0.9.12</li></ul> <h3 class="relative group"><a id="nov-20-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nov-20-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Nov 20, 2023</span></h3> <ul data-svelte-h="svelte-1xmj8wz"><li>Added significant flexibility for Hugging Face Hub based timm models via <code>model_args</code> config entry. <code>model_args</code> will be passed as kwargs through to models on creation.<ul><li>See example at <a href="https://huggingface.co/gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k/blob/main/config.json" rel="nofollow">https://huggingface.co/gaunernst/vit_base_patch16_1024_128.audiomae_as2m_ft_as20k/blob/main/config.json</a></li> <li>Usage: <a href="https://github.com/huggingface/pytorch-image-models/discussions/2035" rel="nofollow">https://github.com/huggingface/pytorch-image-models/discussions/2035</a></li></ul></li> <li>Updated imagenet eval and test set csv files with latest models</li> <li><code>vision_transformer.py</code> typing and doc cleanup by <a href="https://github.com/Laurent2916" rel="nofollow">Laureηt</a></li> <li>0.9.11 release</li></ul> <h3 class="relative group"><a id="nov-3-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#nov-3-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Nov 3, 2023</span></h3> <ul data-svelte-h="svelte-1g84gdl"><li><a href="https://huggingface.co/papers/2309.17425" rel="nofollow">DFN (Data Filtering Networks)</a> and <a href="https://huggingface.co/papers/2309.16671" rel="nofollow">MetaCLIP</a> ViT weights added</li> <li>DINOv2 ‘register’ ViT model weights added (<a href="https://huggingface.co/papers/2309.16588" rel="nofollow">https://huggingface.co/papers/2309.16588</a>, <a href="https://huggingface.co/papers/2304.07193" rel="nofollow">https://huggingface.co/papers/2304.07193</a>)</li> <li>Add <code>quickgelu</code> ViT variants for OpenAI, DFN, MetaCLIP weights that use it (less efficient)</li> <li>Improved typing added to ResNet, MobileNet-v3 thanks to <a href="https://github.com/a-r-r-o-w" rel="nofollow">Aryan</a></li> <li>ImageNet-12k fine-tuned (from LAION-2B CLIP) <code>convnext_xxlarge</code></li> <li>0.9.9 release</li></ul> <h3 class="relative group"><a id="oct-20-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#oct-20-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Oct 20, 2023</span></h3> <ul data-svelte-h="svelte-1qzyk27"><li><a href="https://huggingface.co/papers/2303.15343" rel="nofollow">SigLIP</a> image tower weights supported in <code>vision_transformer.py</code>.<ul><li>Great potential for fine-tune and downstream feature use.</li></ul></li> <li>Experimental ‘register’ support in vit models as per <a href="https://huggingface.co/papers/2309.16588" rel="nofollow">Vision Transformers Need Registers</a></li> <li>Updated RepViT with new weight release. Thanks <a href="https://github.com/jameslahm" rel="nofollow">wangao</a></li> <li>Add patch resizing support (on pretrained weight load) to Swin models</li> <li>0.9.8 release pending</li></ul> <h3 class="relative group"><a id="sep-1-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sep-1-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sep 1, 2023</span></h3> <ul data-svelte-h="svelte-1dse2f8"><li>TinyViT added by <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>Fix EfficientViT (MIT) to use torch.autocast so it works back to PT 1.10</li> <li>0.9.7 release</li></ul> <h3 class="relative group"><a id="aug-28-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-28-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 28, 2023</span></h3> <ul data-svelte-h="svelte-5b033t"><li>Add dynamic img size support to models in <code>vision_transformer.py</code>, <code>vision_transformer_hybrid.py</code>, <code>deit.py</code>, and <code>eva.py</code> w/o breaking backward compat.<ul><li>Add <code>dynamic_img_size=True</code> to args at model creation time to allow changing the grid size (interpolate abs and/or ROPE pos embed each forward pass).</li> <li>Add <code>dynamic_img_pad=True</code> to allow image sizes that aren’t divisible by patch size (pad bottom right to patch size each forward pass).</li> <li>Enabling either dynamic mode will break FX tracing unless PatchEmbed module added as leaf.</li> <li>Existing method of resizing position embedding by passing different <code>img_size</code> (interpolate pretrained embed weights once) on creation still works.</li> <li>Existing method of changing <code>patch_size</code> (resize pretrained patch_embed weights once) on creation still works.</li> <li>Example validation cmd <code>python validate.py --data-dir /imagenet --model vit_base_patch16_224 --amp --amp-dtype bfloat16 --img-size 255 --crop-pct 1.0 --model-kwargs dynamic_img_size=True dyamic_img_pad=True</code></li></ul></li></ul> <h3 class="relative group"><a id="aug-25-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-25-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 25, 2023</span></h3> <ul data-svelte-h="svelte-1spql0o"><li>Many new models since last release<ul><li>FastViT - <a href="https://arxiv.org/abs/2303.14189" rel="nofollow">https://arxiv.org/abs/2303.14189</a></li> <li>MobileOne - <a href="https://arxiv.org/abs/2206.04040" rel="nofollow">https://arxiv.org/abs/2206.04040</a></li> <li>InceptionNeXt - <a href="https://arxiv.org/abs/2303.16900" rel="nofollow">https://arxiv.org/abs/2303.16900</a></li> <li>RepGhostNet - <a href="https://arxiv.org/abs/2211.06088" rel="nofollow">https://arxiv.org/abs/2211.06088</a> (thanks <a href="https://github.com/ChengpengChen" rel="nofollow">https://github.com/ChengpengChen</a>)</li> <li>GhostNetV2 - <a href="https://arxiv.org/abs/2211.12905" rel="nofollow">https://arxiv.org/abs/2211.12905</a> (thanks <a href="https://github.com/yehuitang" rel="nofollow">https://github.com/yehuitang</a>)</li> <li>EfficientViT (MSRA) - <a href="https://arxiv.org/abs/2305.07027" rel="nofollow">https://arxiv.org/abs/2305.07027</a> (thanks <a href="https://github.com/seefun" rel="nofollow">https://github.com/seefun</a>)</li> <li>EfficientViT (MIT) - <a href="https://arxiv.org/abs/2205.14756" rel="nofollow">https://arxiv.org/abs/2205.14756</a> (thanks <a href="https://github.com/seefun" rel="nofollow">https://github.com/seefun</a>)</li></ul></li> <li>Add <code>--reparam</code> arg to <code>benchmark.py</code>, <code>onnx_export.py</code>, and <code>validate.py</code> to trigger layer reparameterization / fusion for models with any one of <code>reparameterize()</code>, <code>switch_to_deploy()</code> or <code>fuse()</code><ul><li>Including FastViT, MobileOne, RepGhostNet, EfficientViT (MSRA), RepViT, RepVGG, and LeViT</li></ul></li> <li>Preparing 0.9.6 ‘back to school’ release</li></ul> <h3 class="relative group"><a id="aug-11-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-11-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 11, 2023</span></h3> <ul data-svelte-h="svelte-1492ay2"><li>Swin, MaxViT, CoAtNet, and BEiT models support resizing of image/window size on creation with adaptation of pretrained weights</li> <li>Example validation cmd to test w/ non-square resize <code>python validate.py --data-dir /imagenet --model swin_base_patch4_window7_224.ms_in22k_ft_in1k --amp --amp-dtype bfloat16 --input-size 3 256 320 --model-kwargs window_size=8,10 img_size=256,320</code></li></ul> <h3 class="relative group"><a id="aug-3-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-3-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 3, 2023</span></h3> <ul data-svelte-h="svelte-1emosi9"><li>Add GluonCV weights for HRNet w18_small and w18_small_v2. Converted by <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>Fix <code>selecsls</code> model naming regression</li> <li>Patch and position embedding for ViT/EVA works for bfloat16/float16 weights on load (or activations for on-the-fly resize)</li> <li>v0.9.5 release prep</li></ul> <h3 class="relative group"><a id="july-27-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-27-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 27, 2023</span></h3> <ul data-svelte-h="svelte-zhgqqt"><li>Added timm trained <code>seresnextaa201d_32x8d.sw_in12k_ft_in1k_384</code> weights (and <code>.sw_in12k</code> pretrain) with 87.3% top-1 on ImageNet-1k, best ImageNet ResNet family model I’m aware of.</li> <li>RepViT model and weights (<a href="https://arxiv.org/abs/2307.09283" rel="nofollow">https://arxiv.org/abs/2307.09283</a>) added by <a href="https://github.com/jameslahm" rel="nofollow">wangao</a></li> <li>I-JEPA ViT feature weights (no classifier) added by <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>SAM-ViT (segment anything) feature weights (no classifier) added by <a href="https://github.com/seefun" rel="nofollow">SeeFun</a></li> <li>Add support for alternative feat extraction methods and -ve indices to EfficientNet</li> <li>Add NAdamW optimizer</li> <li>Misc fixes</li></ul> <h3 class="relative group"><a id="may-11-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-11-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 11, 2023</span></h3> <ul data-svelte-h="svelte-gifogx"><li><code>timm</code> 0.9 released, transition from 0.8.xdev releases</li></ul> <h3 class="relative group"><a id="may-10-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-10-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 10, 2023</span></h3> <ul data-svelte-h="svelte-18h01cj"><li>Hugging Face Hub downloading is now default, 1132 models on <a href="https://huggingface.co/timm" rel="nofollow">https://huggingface.co/timm</a>, 1163 weights in <code>timm</code></li> <li>DINOv2 vit feature backbone weights added thanks to <a href="https://github.com/leng-yue" rel="nofollow">Leng Yue</a></li> <li>FB MAE vit feature backbone weights added</li> <li>OpenCLIP DataComp-XL L/14 feat backbone weights added</li> <li>MetaFormer (poolformer-v2, caformer, convformer, updated poolformer (v1)) w/ weights added by <a href="https://github.com/fffffgggg54" rel="nofollow">Fredo Guan</a></li> <li>Experimental <code>get_intermediate_layers</code> function on vit/deit models for grabbing hidden states (inspired by DINO impl). This is WIP and may change significantly… feedback welcome.</li> <li>Model creation throws error if <code>pretrained=True</code> and no weights exist (instead of continuing with random initialization)</li> <li>Fix regression with inception / nasnet TF sourced weights with 1001 classes in original classifiers</li> <li>bitsandbytes (<a href="https://github.com/TimDettmers/bitsandbytes" rel="nofollow">https://github.com/TimDettmers/bitsandbytes</a>) optimizers added to factory, use <code>bnb</code> prefix, ie <code>bnbadam8bit</code></li> <li>Misc cleanup and fixes</li> <li>Final testing before switching to a 0.9 and bringing <code>timm</code> out of pre-release state</li></ul> <h3 class="relative group"><a id="april-27-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-27-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 27, 2023</span></h3> <ul data-svelte-h="svelte-cwpusb"><li>97% of <code>timm</code> models uploaded to HF Hub and almost all updated to support multi-weight pretrained configs</li> <li>Minor cleanup and refactoring of another batch of models as multi-weight added. More fused_attn (F.sdpa) and features_only support, and torchscript fixes.</li></ul> <h3 class="relative group"><a id="april-21-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-21-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 21, 2023</span></h3> <ul data-svelte-h="svelte-nnrgzx"><li>Gradient accumulation support added to train script and tested (<code>--grad-accum-steps</code>), thanks <a href="https://github.com/voidbag" rel="nofollow">Taeksang Kim</a></li> <li>More weights on HF Hub (cspnet, cait, volo, xcit, tresnet, hardcorenas, densenet, dpn, vovnet, xception_aligned)</li> <li>Added <code>--head-init-scale</code> and <code>--head-init-bias</code> to train.py to scale classiifer head and set fixed bias for fine-tune</li> <li>Remove all InplaceABN (<code>inplace_abn</code>) use, replaced use in tresnet with standard BatchNorm (modified weights accordingly).</li></ul> <h3 class="relative group"><a id="april-12-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-12-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 12, 2023</span></h3> <ul data-svelte-h="svelte-1n3jsdn"><li>Add ONNX export script, validate script, helpers that I’ve had kicking around for along time. Tweak ‘same’ padding for better export w/ recent ONNX + pytorch.</li> <li>Refactor dropout args for vit and vit-like models, separate drop_rate into <code>drop_rate</code> (classifier dropout), <code>proj_drop_rate</code> (block mlp / out projections), <code>pos_drop_rate</code> (position embedding drop), <code>attn_drop_rate</code> (attention dropout). Also add patch dropout (FLIP) to vit and eva models.</li> <li>fused F.scaled_dot_product_attention support to more vit models, add env var (TIMM_FUSED_ATTN) to control, and config interface to enable/disable</li> <li>Add EVA-CLIP backbones w/ image tower weights, all the way up to 4B param ‘enormous’ model, and 336x336 OpenAI ViT mode that was missed.</li></ul> <h3 class="relative group"><a id="april-5-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-5-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 5, 2023</span></h3> <ul data-svelte-h="svelte-jmvfgb"><li>ALL ResNet models pushed to Hugging Face Hub with multi-weight support<ul><li>All past <code>timm</code> trained weights added with recipe based tags to differentiate</li> <li>All ResNet strikes back A1/A2/A3 (seed 0) and R50 example B/C1/C2/D weights available</li> <li>Add torchvision v2 recipe weights to existing torchvision originals</li> <li>See comparison table in <a href="https://huggingface.co/timm/seresnextaa101d_32x8d.sw_in12k_ft_in1k_288#model-comparison" rel="nofollow">https://huggingface.co/timm/seresnextaa101d_32x8d.sw_in12k_ft_in1k_288#model-comparison</a></li></ul></li> <li>New ImageNet-12k + ImageNet-1k fine-tunes available for a few anti-aliased ResNet models<ul><li><code>resnetaa50d.sw_in12k_ft_in1k</code> - 81.7 @ 224, 82.6 @ 288</li> <li><code>resnetaa101d.sw_in12k_ft_in1k</code> - 83.5 @ 224, 84.1 @ 288</li> <li><code>seresnextaa101d_32x8d.sw_in12k_ft_in1k</code> - 86.0 @ 224, 86.5 @ 288</li> <li><code>seresnextaa101d_32x8d.sw_in12k_ft_in1k_288</code> - 86.5 @ 288, 86.7 @ 320</li></ul></li></ul> <h3 class="relative group"><a id="march-31-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-31-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 31, 2023</span></h3> <ul data-svelte-h="svelte-1nof9x1"><li>Add first ConvNext-XXLarge CLIP -> IN-1k fine-tune and IN-12k intermediate fine-tunes for convnext-base/large CLIP models.</li></ul> <table data-svelte-h="svelte-beoh8"><thead><tr><th>model</th> <th>top1</th> <th>top5</th> <th>img_size</th> <th>param_count</th> <th>gmacs</th> <th>macts</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/timm/convnext_xxlarge.clip_laion2b_soup_ft_in1k" rel="nofollow">convnext_xxlarge.clip_laion2b_soup_ft_in1k</a></td> <td>88.612</td> <td>98.704</td> <td>256</td> <td>846.47</td> <td>198.09</td> <td>124.45</td></tr> <tr><td>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</td> <td>88.312</td> <td>98.578</td> <td>384</td> <td>200.13</td> <td>101.11</td> <td>126.74</td></tr> <tr><td>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_320</td> <td>87.968</td> <td>98.47</td> <td>320</td> <td>200.13</td> <td>70.21</td> <td>88.02</td></tr> <tr><td>convnext_base.clip_laion2b_augreg_ft_in12k_in1k_384</td> <td>87.138</td> <td>98.212</td> <td>384</td> <td>88.59</td> <td>45.21</td> <td>84.49</td></tr> <tr><td>convnext_base.clip_laion2b_augreg_ft_in12k_in1k</td> <td>86.344</td> <td>97.97</td> <td>256</td> <td>88.59</td> <td>20.09</td> <td>37.55</td></tr></tbody></table> <ul data-svelte-h="svelte-499b69"><li>Add EVA-02 MIM pretrained and fine-tuned weights, push to HF hub and update model cards for all EVA models. First model over 90% top-1 (99% top-5)! Check out the original code & weights at <a href="https://github.com/baaivision/EVA" rel="nofollow">https://github.com/baaivision/EVA</a> for more details on their work blending MIM, CLIP w/ many model, dataset, and train recipe tweaks.</li></ul> <table data-svelte-h="svelte-12ymbo6"><thead><tr><th>model</th> <th>top1</th> <th>top5</th> <th>param_count</th> <th>img_size</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/timm/eva02_large_patch14_448.mim_m38m_ft_in1k" rel="nofollow">eva02_large_patch14_448.mim_m38m_ft_in22k_in1k</a></td> <td>90.054</td> <td>99.042</td> <td>305.08</td> <td>448</td></tr> <tr><td>eva02_large_patch14_448.mim_in22k_ft_in22k_in1k</td> <td>89.946</td> <td>99.01</td> <td>305.08</td> <td>448</td></tr> <tr><td>eva_giant_patch14_560.m30m_ft_in22k_in1k</td> <td>89.792</td> <td>98.992</td> <td>1014.45</td> <td>560</td></tr> <tr><td>eva02_large_patch14_448.mim_in22k_ft_in1k</td> <td>89.626</td> <td>98.954</td> <td>305.08</td> <td>448</td></tr> <tr><td>eva02_large_patch14_448.mim_m38m_ft_in1k</td> <td>89.57</td> <td>98.918</td> <td>305.08</td> <td>448</td></tr> <tr><td>eva_giant_patch14_336.m30m_ft_in22k_in1k</td> <td>89.56</td> <td>98.956</td> <td>1013.01</td> <td>336</td></tr> <tr><td>eva_giant_patch14_336.clip_ft_in1k</td> <td>89.466</td> <td>98.82</td> <td>1013.01</td> <td>336</td></tr> <tr><td>eva_large_patch14_336.in22k_ft_in22k_in1k</td> <td>89.214</td> <td>98.854</td> <td>304.53</td> <td>336</td></tr> <tr><td>eva_giant_patch14_224.clip_ft_in1k</td> <td>88.882</td> <td>98.678</td> <td>1012.56</td> <td>224</td></tr> <tr><td>eva02_base_patch14_448.mim_in22k_ft_in22k_in1k</td> <td>88.692</td> <td>98.722</td> <td>87.12</td> <td>448</td></tr> <tr><td>eva_large_patch14_336.in22k_ft_in1k</td> <td>88.652</td> <td>98.722</td> <td>304.53</td> <td>336</td></tr> <tr><td>eva_large_patch14_196.in22k_ft_in22k_in1k</td> <td>88.592</td> <td>98.656</td> <td>304.14</td> <td>196</td></tr> <tr><td>eva02_base_patch14_448.mim_in22k_ft_in1k</td> <td>88.23</td> <td>98.564</td> <td>87.12</td> <td>448</td></tr> <tr><td>eva_large_patch14_196.in22k_ft_in1k</td> <td>87.934</td> <td>98.504</td> <td>304.14</td> <td>196</td></tr> <tr><td>eva02_small_patch14_336.mim_in22k_ft_in1k</td> <td>85.74</td> <td>97.614</td> <td>22.13</td> <td>336</td></tr> <tr><td>eva02_tiny_patch14_336.mim_in22k_ft_in1k</td> <td>80.658</td> <td>95.524</td> <td>5.76</td> <td>336</td></tr></tbody></table> <ul data-svelte-h="svelte-1n8fcr3"><li>Multi-weight and HF hub for DeiT and MLP-Mixer based models</li></ul> <h3 class="relative group"><a id="march-22-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-22-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 22, 2023</span></h3> <ul data-svelte-h="svelte-1opkf16"><li>More weights pushed to HF hub along with multi-weight support, including: <code>regnet.py</code>, <code>rexnet.py</code>, <code>byobnet.py</code>, <code>resnetv2.py</code>, <code>swin_transformer.py</code>, <code>swin_transformer_v2.py</code>, <code>swin_transformer_v2_cr.py</code></li> <li>Swin Transformer models support feature extraction (NCHW feat maps for <code>swinv2_cr_</code>, and NHWC for all others) and spatial embedding outputs.</li> <li>FocalNet (from <a href="https://github.com/microsoft/FocalNet" rel="nofollow">https://github.com/microsoft/FocalNet</a>) models and weights added with significant refactoring, feature extraction, no fixed resolution / sizing constraint</li> <li>RegNet weights increased with HF hub push, SWAG, SEER, and torchvision v2 weights. SEER is pretty poor wrt to performance for model size, but possibly useful.</li> <li>More ImageNet-12k pretrained and 1k fine-tuned <code>timm</code> weights:<ul><li><code>rexnetr_200.sw_in12k_ft_in1k</code> - 82.6 @ 224, 83.2 @ 288</li> <li><code>rexnetr_300.sw_in12k_ft_in1k</code> - 84.0 @ 224, 84.5 @ 288</li> <li><code>regnety_120.sw_in12k_ft_in1k</code> - 85.0 @ 224, 85.4 @ 288</li> <li><code>regnety_160.lion_in12k_ft_in1k</code> - 85.6 @ 224, 86.0 @ 288</li> <li><code>regnety_160.sw_in12k_ft_in1k</code> - 85.6 @ 224, 86.0 @ 288 (compare to SWAG PT + 1k FT this is same BUT much lower res, blows SEER FT away)</li></ul></li> <li>Model name deprecation + remapping functionality added (a milestone for bringing 0.8.x out of pre-release). Mappings being added…</li> <li>Minor bug fixes and improvements.</li></ul> <h3 class="relative group"><a id="feb-26-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-26-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 26, 2023</span></h3> <ul data-svelte-h="svelte-kdfxn"><li>Add ConvNeXt-XXLarge CLIP pretrained image tower weights for fine-tune & features (fine-tuning TBD) — see <a href="https://huggingface.co/laion/CLIP-convnext_xxlarge-laion2B-s34B-b82K-augreg-soup" rel="nofollow">model card</a></li> <li>Update <code>convnext_xxlarge</code> default LayerNorm eps to 1e-5 (for CLIP weights, improved stability)</li> <li>0.8.15dev0</li></ul> <h3 class="relative group"><a id="feb-20-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-20-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 20, 2023</span></h3> <ul data-svelte-h="svelte-t8szp7"><li>Add 320x320 <code>convnext_large_mlp.clip_laion2b_ft_320</code> and <code>convnext_lage_mlp.clip_laion2b_ft_soup_320</code> CLIP image tower weights for features & fine-tune</li> <li>0.8.13dev0 pypi release for latest changes w/ move to huggingface org</li></ul> <h3 class="relative group"><a id="feb-16-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-16-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 16, 2023</span></h3> <ul data-svelte-h="svelte-1n2berl"><li><code>safetensor</code> checkpoint support added</li> <li>Add ideas from ‘Scaling Vision Transformers to 22 B. Params’ (<a href="https://arxiv.org/abs/2302.05442" rel="nofollow">https://arxiv.org/abs/2302.05442</a>) — qk norm, RmsNorm, parallel block</li> <li>Add F.scaled<em>dot_product_attention support (PyTorch 2.0 only) to `vit</em><em><code>, </code>vit_relpos</em><code>, </code>coatnet<code>/</code>maxxvit` (to start)</li> <li>Lion optimizer (w/ multi-tensor option) added (<a href="https://arxiv.org/abs/2302.06675" rel="nofollow">https://arxiv.org/abs/2302.06675</a>)</li> <li>gradient checkpointing works with <code>features_only=True</code></li></ul> <h3 class="relative group"><a id="feb-7-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-7-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 7, 2023</span></h3> <ul data-svelte-h="svelte-r90jdl"><li>New inference benchmark numbers added in <a href="results/">results</a> folder.</li> <li>Add convnext LAION CLIP trained weights and initial set of in1k fine-tunes<ul><li><code>convnext_base.clip_laion2b_augreg_ft_in1k</code> - 86.2% @ 256x256</li> <li><code>convnext_base.clip_laiona_augreg_ft_in1k_384</code> - 86.5% @ 384x384</li> <li><code>convnext_large_mlp.clip_laion2b_augreg_ft_in1k</code> - 87.3% @ 256x256</li> <li><code>convnext_large_mlp.clip_laion2b_augreg_ft_in1k_384</code> - 87.9% @ 384x384</li></ul></li> <li>Add DaViT models. Supports <code>features_only=True</code>. Adapted from <a href="https://github.com/dingmyu/davit" rel="nofollow">https://github.com/dingmyu/davit</a> by <a href="https://github.com/fffffgggg54" rel="nofollow">Fredo</a>.</li> <li>Use a common NormMlpClassifierHead across MaxViT, ConvNeXt, DaViT</li> <li>Add EfficientFormer-V2 model, update EfficientFormer, and refactor LeViT (closely related architectures). Weights on HF hub.<ul><li>New EfficientFormer-V2 arch, significant refactor from original at (<a href="https://github.com/snap-research/EfficientFormer" rel="nofollow">https://github.com/snap-research/EfficientFormer</a>). Supports <code>features_only=True</code>.</li> <li>Minor updates to EfficientFormer.</li> <li>Refactor LeViT models to stages, add <code>features_only=True</code> support to new <code>conv</code> variants, weight remap required.</li></ul></li> <li>Move ImageNet meta-data (synsets, indices) from <code>/results</code> to <a href="timm/data/_info/"><code>timm/data/_info</code></a>.</li> <li>Add ImageNetInfo / DatasetInfo classes to provide labelling for various ImageNet classifier layouts in <code>timm</code><ul><li>Update <code>inference.py</code> to use, try: <code>python inference.py --data-dir /folder/to/images --model convnext_small.in12k --label-type detail --topk 5</code></li></ul></li> <li>Ready for 0.8.10 pypi pre-release (final testing).</li></ul> <h3 class="relative group"><a id="jan-20-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-20-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 20, 2023</span></h3> <ul data-svelte-h="svelte-3zs485"><li><p>Add two convnext 12k -> 1k fine-tunes at 384x384</p> <ul><li><code>convnext_tiny.in12k_ft_in1k_384</code> - 85.1 @ 384</li> <li><code>convnext_small.in12k_ft_in1k_384</code> - 86.2 @ 384</li></ul></li> <li><p>Push all MaxxViT weights to HF hub, and add new ImageNet-12k -> 1k fine-tunes for <code>rw</code> base MaxViT and CoAtNet 1/2 models</p></li></ul> <table data-svelte-h="svelte-1jw3h7v"><thead><tr><th>model</th> <th align="right">top1</th> <th align="right">top5</th> <th align="right">samples / sec</th> <th align="right">Params (M)</th> <th align="right">GMAC</th> <th align="right">Act (M)</th></tr></thead> <tbody><tr><td><a href="https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k" rel="nofollow">maxvit_xlarge_tf_512.in21k_ft_in1k</a></td> <td align="right">88.53</td> <td align="right">98.64</td> <td align="right">21.76</td> <td align="right">475.77</td> <td align="right">534.14</td> <td align="right">1413.22</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k" rel="nofollow">maxvit_xlarge_tf_384.in21k_ft_in1k</a></td> <td align="right">88.32</td> <td align="right">98.54</td> <td align="right">42.53</td> <td align="right">475.32</td> <td align="right">292.78</td> <td align="right">668.76</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k" rel="nofollow">maxvit_base_tf_512.in21k_ft_in1k</a></td> <td align="right">88.20</td> <td align="right">98.53</td> <td align="right">50.87</td> <td align="right">119.88</td> <td align="right">138.02</td> <td align="right">703.99</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k" rel="nofollow">maxvit_large_tf_512.in21k_ft_in1k</a></td> <td align="right">88.04</td> <td align="right">98.40</td> <td align="right">36.42</td> <td align="right">212.33</td> <td align="right">244.75</td> <td align="right">942.15</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k" rel="nofollow">maxvit_large_tf_384.in21k_ft_in1k</a></td> <td align="right">87.98</td> <td align="right">98.56</td> <td align="right">71.75</td> <td align="right">212.03</td> <td align="right">132.55</td> <td align="right">445.84</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k" rel="nofollow">maxvit_base_tf_384.in21k_ft_in1k</a></td> <td align="right">87.92</td> <td align="right">98.54</td> <td align="right">104.71</td> <td align="right">119.65</td> <td align="right">73.80</td> <td align="right">332.90</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k" rel="nofollow">maxvit_rmlp_base_rw_384.sw_in12k_ft_in1k</a></td> <td align="right">87.81</td> <td align="right">98.37</td> <td align="right">106.55</td> <td align="right">116.14</td> <td align="right">70.97</td> <td align="right">318.95</td></tr> <tr><td><a href="https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k" rel="nofollow">maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1k</a></td> <td align="right">87.47</td> <td align="right">98.37</td> <td align="right">149.49</td> <td align="right">116.09</td> <td align="right">72.98</td> <td align="right">213.74</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k" rel="nofollow">coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k</a></td> <td align="right">87.39</td> <td align="right">98.31</td> <td align="right">160.80</td> <td align="right">73.88</td> <td align="right">47.69</td> <td align="right">209.43</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k" rel="nofollow">maxvit_rmlp_base_rw_224.sw_in12k_ft_in1k</a></td> <td align="right">86.89</td> <td align="right">98.02</td> <td align="right">375.86</td> <td align="right">116.14</td> <td align="right">23.15</td> <td align="right">92.64</td></tr> <tr><td><a href="https://huggingface.co/timm/maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k" rel="nofollow">maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1k</a></td> <td align="right">86.64</td> <td align="right">98.02</td> <td align="right">501.03</td> <td align="right">116.09</td> <td align="right">24.20</td> <td align="right">62.77</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_base_tf_512.in1k" rel="nofollow">maxvit_base_tf_512.in1k</a></td> <td align="right">86.60</td> <td align="right">97.92</td> <td align="right">50.75</td> <td align="right">119.88</td> <td align="right">138.02</td> <td align="right">703.99</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_2_rw_224.sw_in12k_ft_in1k" rel="nofollow">coatnet_2_rw_224.sw_in12k_ft_in1k</a></td> <td align="right">86.57</td> <td align="right">97.89</td> <td align="right">631.88</td> <td align="right">73.87</td> <td align="right">15.09</td> <td align="right">49.22</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_large_tf_512.in1k" rel="nofollow">maxvit_large_tf_512.in1k</a></td> <td align="right">86.52</td> <td align="right">97.88</td> <td align="right">36.04</td> <td align="right">212.33</td> <td align="right">244.75</td> <td align="right">942.15</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k" rel="nofollow">coatnet_rmlp_2_rw_224.sw_in12k_ft_in1k</a></td> <td align="right">86.49</td> <td align="right">97.90</td> <td align="right">620.58</td> <td align="right">73.88</td> <td align="right">15.18</td> <td align="right">54.78</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_base_tf_384.in1k" rel="nofollow">maxvit_base_tf_384.in1k</a></td> <td align="right">86.29</td> <td align="right">97.80</td> <td align="right">101.09</td> <td align="right">119.65</td> <td align="right">73.80</td> <td align="right">332.90</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_large_tf_384.in1k" rel="nofollow">maxvit_large_tf_384.in1k</a></td> <td align="right">86.23</td> <td align="right">97.69</td> <td align="right">70.56</td> <td align="right">212.03</td> <td align="right">132.55</td> <td align="right">445.84</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_small_tf_512.in1k" rel="nofollow">maxvit_small_tf_512.in1k</a></td> <td align="right">86.10</td> <td align="right">97.76</td> <td align="right">88.63</td> <td align="right">69.13</td> <td align="right">67.26</td> <td align="right">383.77</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_tiny_tf_512.in1k" rel="nofollow">maxvit_tiny_tf_512.in1k</a></td> <td align="right">85.67</td> <td align="right">97.58</td> <td align="right">144.25</td> <td align="right">31.05</td> <td align="right">33.49</td> <td align="right">257.59</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_small_tf_384.in1k" rel="nofollow">maxvit_small_tf_384.in1k</a></td> <td align="right">85.54</td> <td align="right">97.46</td> <td align="right">188.35</td> <td align="right">69.02</td> <td align="right">35.87</td> <td align="right">183.65</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_tiny_tf_384.in1k" rel="nofollow">maxvit_tiny_tf_384.in1k</a></td> <td align="right">85.11</td> <td align="right">97.38</td> <td align="right">293.46</td> <td align="right">30.98</td> <td align="right">17.53</td> <td align="right">123.42</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_large_tf_224.in1k" rel="nofollow">maxvit_large_tf_224.in1k</a></td> <td align="right">84.93</td> <td align="right">96.97</td> <td align="right">247.71</td> <td align="right">211.79</td> <td align="right">43.68</td> <td align="right">127.35</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k" rel="nofollow">coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1k</a></td> <td align="right">84.90</td> <td align="right">96.96</td> <td align="right">1025.45</td> <td align="right">41.72</td> <td align="right">8.11</td> <td align="right">40.13</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_base_tf_224.in1k" rel="nofollow">maxvit_base_tf_224.in1k</a></td> <td align="right">84.85</td> <td align="right">96.99</td> <td align="right">358.25</td> <td align="right">119.47</td> <td align="right">24.04</td> <td align="right">95.01</td></tr> <tr><td><a href="https://huggingface.co/timm/maxxvit_rmlp_small_rw_256.sw_in1k" rel="nofollow">maxxvit_rmlp_small_rw_256.sw_in1k</a></td> <td align="right">84.63</td> <td align="right">97.06</td> <td align="right">575.53</td> <td align="right">66.01</td> <td align="right">14.67</td> <td align="right">58.38</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_2_rw_224.sw_in1k" rel="nofollow">coatnet_rmlp_2_rw_224.sw_in1k</a></td> <td align="right">84.61</td> <td align="right">96.74</td> <td align="right">625.81</td> <td align="right">73.88</td> <td align="right">15.18</td> <td align="right">54.78</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_small_rw_224.sw_in1k" rel="nofollow">maxvit_rmlp_small_rw_224.sw_in1k</a></td> <td align="right">84.49</td> <td align="right">96.76</td> <td align="right">693.82</td> <td align="right">64.90</td> <td align="right">10.75</td> <td align="right">49.30</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_small_tf_224.in1k" rel="nofollow">maxvit_small_tf_224.in1k</a></td> <td align="right">84.43</td> <td align="right">96.83</td> <td align="right">647.96</td> <td align="right">68.93</td> <td align="right">11.66</td> <td align="right">53.17</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_tiny_rw_256.sw_in1k" rel="nofollow">maxvit_rmlp_tiny_rw_256.sw_in1k</a></td> <td align="right">84.23</td> <td align="right">96.78</td> <td align="right">807.21</td> <td align="right">29.15</td> <td align="right">6.77</td> <td align="right">46.92</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_1_rw_224.sw_in1k" rel="nofollow">coatnet_1_rw_224.sw_in1k</a></td> <td align="right">83.62</td> <td align="right">96.38</td> <td align="right">989.59</td> <td align="right">41.72</td> <td align="right">8.04</td> <td align="right">34.60</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_tiny_rw_224.sw_in1k" rel="nofollow">maxvit_tiny_rw_224.sw_in1k</a></td> <td align="right">83.50</td> <td align="right">96.50</td> <td align="right">1100.53</td> <td align="right">29.06</td> <td align="right">5.11</td> <td align="right">33.11</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_tiny_tf_224.in1k" rel="nofollow">maxvit_tiny_tf_224.in1k</a></td> <td align="right">83.41</td> <td align="right">96.59</td> <td align="right">1004.94</td> <td align="right">30.92</td> <td align="right">5.60</td> <td align="right">35.78</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_1_rw_224.sw_in1k" rel="nofollow">coatnet_rmlp_1_rw_224.sw_in1k</a></td> <td align="right">83.36</td> <td align="right">96.45</td> <td align="right">1093.03</td> <td align="right">41.69</td> <td align="right">7.85</td> <td align="right">35.47</td></tr> <tr><td><a href="https://huggingface.co/timm/maxxvitv2_nano_rw_256.sw_in1k" rel="nofollow">maxxvitv2_nano_rw_256.sw_in1k</a></td> <td align="right">83.11</td> <td align="right">96.33</td> <td align="right">1276.88</td> <td align="right">23.70</td> <td align="right">6.26</td> <td align="right">23.05</td></tr> <tr><td><a href="https://huggingface.co/timm/maxxvit_rmlp_nano_rw_256.sw_in1k" rel="nofollow">maxxvit_rmlp_nano_rw_256.sw_in1k</a></td> <td align="right">83.03</td> <td align="right">96.34</td> <td align="right">1341.24</td> <td align="right">16.78</td> <td align="right">4.37</td> <td align="right">26.05</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_nano_rw_256.sw_in1k" rel="nofollow">maxvit_rmlp_nano_rw_256.sw_in1k</a></td> <td align="right">82.96</td> <td align="right">96.26</td> <td align="right">1283.24</td> <td align="right">15.50</td> <td align="right">4.47</td> <td align="right">31.92</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_nano_rw_256.sw_in1k" rel="nofollow">maxvit_nano_rw_256.sw_in1k</a></td> <td align="right">82.93</td> <td align="right">96.23</td> <td align="right">1218.17</td> <td align="right">15.45</td> <td align="right">4.46</td> <td align="right">30.28</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_bn_0_rw_224.sw_in1k" rel="nofollow">coatnet_bn_0_rw_224.sw_in1k</a></td> <td align="right">82.39</td> <td align="right">96.19</td> <td align="right">1600.14</td> <td align="right">27.44</td> <td align="right">4.67</td> <td align="right">22.04</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_0_rw_224.sw_in1k" rel="nofollow">coatnet_0_rw_224.sw_in1k</a></td> <td align="right">82.39</td> <td align="right">95.84</td> <td align="right">1831.21</td> <td align="right">27.44</td> <td align="right">4.43</td> <td align="right">18.73</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_rmlp_nano_rw_224.sw_in1k" rel="nofollow">coatnet_rmlp_nano_rw_224.sw_in1k</a></td> <td align="right">82.05</td> <td align="right">95.87</td> <td align="right">2109.09</td> <td align="right">15.15</td> <td align="right">2.62</td> <td align="right">20.34</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnext_nano_rw_224.sw_in1k" rel="nofollow">coatnext_nano_rw_224.sw_in1k</a></td> <td align="right">81.95</td> <td align="right">95.92</td> <td align="right">2525.52</td> <td align="right">14.70</td> <td align="right">2.47</td> <td align="right">12.80</td></tr> <tr><td><a href="https://huggingface.co/timm/coatnet_nano_rw_224.sw_in1k" rel="nofollow">coatnet_nano_rw_224.sw_in1k</a></td> <td align="right">81.70</td> <td align="right">95.64</td> <td align="right">2344.52</td> <td align="right">15.14</td> <td align="right">2.41</td> <td align="right">15.41</td></tr> <tr><td><a href="https://huggingface.co/timm/maxvit_rmlp_pico_rw_256.sw_in1k" rel="nofollow">maxvit_rmlp_pico_rw_256.sw_in1k</a></td> <td align="right">80.53</td> <td align="right">95.21</td> <td align="right">1594.71</td> <td align="right">7.52</td> <td align="right">1.85</td> <td align="right">24.86</td></tr></tbody></table> <h3 class="relative group"><a id="jan-11-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-11-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 11, 2023</span></h3> <ul data-svelte-h="svelte-15wsr73"><li>Update ConvNeXt ImageNet-12k pretrain series w/ two new fine-tuned weights (and pre FT <code>.in12k</code> tags)<ul><li><code>convnext_nano.in12k_ft_in1k</code> - 82.3 @ 224, 82.9 @ 288 (previously released)</li> <li><code>convnext_tiny.in12k_ft_in1k</code> - 84.2 @ 224, 84.5 @ 288</li> <li><code>convnext_small.in12k_ft_in1k</code> - 85.2 @ 224, 85.3 @ 288</li></ul></li></ul> <h3 class="relative group"><a id="jan-6-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-6-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 6, 2023</span></h3> <ul data-svelte-h="svelte-11mtsqq"><li>Finally got around to adding <code>--model-kwargs</code> and <code>--opt-kwargs</code> to scripts to pass through rare args directly to model classes from cmd line<ul><li><code>train.py --data-dir /imagenet --model resnet50 --amp --model-kwargs output_stride=16 act_layer=silu</code></li> <li><code>train.py --data-dir /imagenet --model vit_base_patch16_clip_224 --img-size 240 --amp --model-kwargs img_size=240 patch_size=12</code></li></ul></li> <li>Cleanup some popular models to better support arg passthrough / merge with model configs, more to go.</li></ul> <h3 class="relative group"><a id="jan-5-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-5-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 5, 2023</span></h3> <ul data-svelte-h="svelte-v6i5yp"><li>ConvNeXt-V2 models and weights added to existing <code>convnext.py</code><ul><li>Paper: <a href="http://arxiv.org/abs/2301.00808" rel="nofollow">ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders</a></li> <li>Reference impl: <a href="https://github.com/facebookresearch/ConvNeXt-V2" rel="nofollow">https://github.com/facebookresearch/ConvNeXt-V2</a> (NOTE: weights currently CC-BY-NC)
	@dataclass</li></ul></li></ul> <h3 class="relative group"><a id="dec-23-2022-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-23-2022-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 23, 2022 🎄☃</span></h3> <ul data-svelte-h="svelte-hz5yxo"><li>Add FlexiViT models and weights from <a href="https://github.com/google-research/big_vision" rel="nofollow">https://github.com/google-research/big_vision</a> (check out paper at <a href="https://arxiv.org/abs/2212.08013" rel="nofollow">https://arxiv.org/abs/2212.08013</a>)<ul><li>NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP</li></ul></li> <li>Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer, beit)</li> <li>More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)</li> <li>More ImageNet-12k (subset of 22k) pretrain models popping up:<ul><li><code>efficientnet_b5.in12k_ft_in1k</code> - 85.9 @ 448x448</li> <li><code>vit_medium_patch16_gap_384.in12k_ft_in1k</code> - 85.5 @ 384x384</li> <li><code>vit_medium_patch16_gap_256.in12k_ft_in1k</code> - 84.5 @ 256x256</li> <li><code>convnext_nano.in12k_ft_in1k</code> - 82.9 @ 288x288</li></ul></li></ul> <h3 class="relative group"><a id="dec-8-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-8-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 8, 2022</span></h3> <ul data-svelte-h="svelte-1ybsdar"><li>Add ‘EVA l’ to <code>vision_transformer.py</code>, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)<ul><li>original source: <a href="https://github.com/baaivision/EVA" rel="nofollow">https://github.com/baaivision/EVA</a></li></ul></li></ul> <table data-svelte-h="svelte-1kdm8f8"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">eva_large_patch14_336.in22k_ft_in22k_in1k</td> <td align="right">89.2</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_336.in22k_ft_in1k</td> <td align="right">88.7</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_196.in22k_ft_in22k_in1k</td> <td align="right">88.6</td> <td align="right">304.1</td> <td align="right">61.6</td> <td align="right">63.5</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_196.in22k_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.1</td> <td align="right">61.6</td> <td align="right">63.5</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="dec-6-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-6-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 6, 2022</span></h3> <ul data-svelte-h="svelte-90svo3"><li>Add ‘EVA g’, BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to <code>beit.py</code>.<ul><li>original source: <a href="https://github.com/baaivision/EVA" rel="nofollow">https://github.com/baaivision/EVA</a></li> <li>paper: <a href="https://arxiv.org/abs/2211.07636" rel="nofollow">https://arxiv.org/abs/2211.07636</a></li></ul></li></ul> <table data-svelte-h="svelte-1jlkusq"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">eva_giant_patch14_560.m30m_ft_in22k_in1k</td> <td align="right">89.8</td> <td align="right">1014.4</td> <td align="right">1906.8</td> <td align="right">2577.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_336.m30m_ft_in22k_in1k</td> <td align="right">89.6</td> <td align="right">1013</td> <td align="right">620.6</td> <td align="right">550.7</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_336.clip_ft_in1k</td> <td align="right">89.4</td> <td align="right">1013</td> <td align="right">620.6</td> <td align="right">550.7</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_224.clip_ft_in1k</td> <td align="right">89.1</td> <td align="right">1012.6</td> <td align="right">267.2</td> <td align="right">192.6</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="dec-5-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-5-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 5, 2022</span></h3> <ul data-svelte-h="svelte-1f4joci"><li>Pre-release (<code>0.8.0dev0</code>) of multi-weight support (<code>model_arch.pretrained_tag</code>). Install with <code>pip install --pre timm</code><ul><li>vision_transformer, maxvit, convnext are the first three model impl w/ support</li> <li>model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling</li> <li>bugs are likely, but I need feedback so please try it out</li> <li>if stability is needed, please use 0.6.x pypi releases or clone from <a href="https://github.com/rwightman/pytorch-image-models/tree/0.6.x" rel="nofollow">0.6.x branch</a></li></ul></li> <li>Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use <code>--torchcompile</code> argument</li> <li>Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output</li> <li>Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models</li></ul> <table data-svelte-h="svelte-1w1dpjo"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k</td> <td align="right">88.6</td> <td align="right">632.5</td> <td align="right">391</td> <td align="right">407.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.openai_ft_in12k_in1k</td> <td align="right">88.3</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">632</td> <td align="right">167.4</td> <td align="right">139.4</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.laion2b_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.openai_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">87.9</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.openai_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.laion2b_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_huge_patch14_clip_224.laion2b_ft_in1k</td> <td align="right">87.6</td> <td align="right">632</td> <td align="right">167.4</td> <td align="right">139.4</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.laion2b_ft_in1k</td> <td align="right">87.3</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.laion2b_ft_in12k_in1k</td> <td align="right">87.2</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.openai_ft_in12k_in1k</td> <td align="right">87</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.laion2b_ft_in1k</td> <td align="right">86.6</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.openai_ft_in1k</td> <td align="right">86.2</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">86.2</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.openai_ft_in12k_in1k</td> <td align="right">85.9</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_448.laion2b_ft_in12k_in1k</td> <td align="right">85.8</td> <td align="right">88.3</td> <td align="right">17.9</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.laion2b_ft_in1k</td> <td align="right">85.5</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_384.laion2b_ft_in12k_in1k</td> <td align="right">85.4</td> <td align="right">88.3</td> <td align="right">13.1</td> <td align="right">16.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.openai_ft_in1k</td> <td align="right">85.3</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_384.openai_ft_in12k_in1k</td> <td align="right">85.2</td> <td align="right">88.3</td> <td align="right">13.1</td> <td align="right">16.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">83.3</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.laion2b_ft_in1k</td> <td align="right">82.6</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.openai_ft_in1k</td> <td align="right">81.9</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr></tbody></table> <ul data-svelte-h="svelte-mr00b7"><li>Port of MaxViT Tensorflow Weights from official impl at <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a><ul><li>There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing</li></ul></li></ul> <table data-svelte-h="svelte-uhceid"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">maxvit_xlarge_tf_512.in21k_ft_in1k</td> <td align="right">88.5</td> <td align="right">475.8</td> <td align="right">534.1</td> <td align="right">1413.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_xlarge_tf_384.in21k_ft_in1k</td> <td align="right">88.3</td> <td align="right">475.3</td> <td align="right">292.8</td> <td align="right">668.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_512.in21k_ft_in1k</td> <td align="right">88.2</td> <td align="right">119.9</td> <td align="right">138</td> <td align="right">704</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_512.in21k_ft_in1k</td> <td align="right">88</td> <td align="right">212.3</td> <td align="right">244.8</td> <td align="right">942.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_384.in21k_ft_in1k</td> <td align="right">88</td> <td align="right">212</td> <td align="right">132.6</td> <td align="right">445.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_384.in21k_ft_in1k</td> <td align="right">87.9</td> <td align="right">119.6</td> <td align="right">73.8</td> <td align="right">332.9</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_512.in1k</td> <td align="right">86.6</td> <td align="right">119.9</td> <td align="right">138</td> <td align="right">704</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_512.in1k</td> <td align="right">86.5</td> <td align="right">212.3</td> <td align="right">244.8</td> <td align="right">942.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_384.in1k</td> <td align="right">86.3</td> <td align="right">119.6</td> <td align="right">73.8</td> <td align="right">332.9</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_384.in1k</td> <td align="right">86.2</td> <td align="right">212</td> <td align="right">132.6</td> <td align="right">445.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_512.in1k</td> <td align="right">86.1</td> <td align="right">69.1</td> <td align="right">67.3</td> <td align="right">383.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_512.in1k</td> <td align="right">85.7</td> <td align="right">31</td> <td align="right">33.5</td> <td align="right">257.6</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_384.in1k</td> <td align="right">85.5</td> <td align="right">69</td> <td align="right">35.9</td> <td align="right">183.6</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_384.in1k</td> <td align="right">85.1</td> <td align="right">31</td> <td align="right">17.5</td> <td align="right">123.4</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_224.in1k</td> <td align="right">84.9</td> <td align="right">211.8</td> <td align="right">43.7</td> <td align="right">127.4</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_224.in1k</td> <td align="right">84.9</td> <td align="right">119.5</td> <td align="right">24</td> <td align="right">95</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_224.in1k</td> <td align="right">84.4</td> <td align="right">68.9</td> <td align="right">11.7</td> <td align="right">53.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_224.in1k</td> <td align="right">83.4</td> <td align="right">30.9</td> <td align="right">5.6</td> <td align="right">35.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_224.in1k" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="oct-15-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#oct-15-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Oct 15, 2022</span></h3> <ul data-svelte-h="svelte-a95su3"><li>Train and validation script enhancements</li> <li>Non-GPU (ie CPU) device support</li> <li>SLURM compatibility for train script</li> <li>HF datasets support (via ReaderHfds)</li> <li>TFDS/WDS dataloading improvements (sample padding/wrap for distributed use fixed wrt sample count estimate)</li> <li>in_chans !=3 support for scripts / loader</li> <li>Adan optimizer</li> <li>Can enable per-step LR scheduling via args</li> <li>Dataset ‘parsers’ renamed to ‘readers’, more descriptive of purpose</li> <li>AMP args changed, APEX via <code>--amp-impl apex</code>, bfloat16 supportedf via <code>--amp-dtype bfloat16</code></li> <li>main branch switched to 0.7.x version, 0.6x forked for stable release of weight only adds</li> <li>master -> main branch rename</li></ul> <h3 class="relative group"><a id="oct-10-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#oct-10-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Oct 10, 2022</span></h3> <ul data-svelte-h="svelte-w97o2d"><li>More weights in <code>maxxvit</code> series, incl first ConvNeXt block based <code>coatnext</code> and <code>maxxvit</code> experiments:<ul><li><code>coatnext_nano_rw_224</code> - 82.0 @ 224 (G) — (uses ConvNeXt conv block, no BatchNorm)</li> <li><code>maxxvit_rmlp_nano_rw_256</code> - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)</li> <li><code>maxvit_rmlp_small_rw_224</code> - 84.5 @ 224, 85.1 @ 320 (G)</li> <li><code>maxxvit_rmlp_small_rw_256</code> - 84.6 @ 256, 84.9 @ 288 (G) — could be trained better, hparams need tuning (uses ConvNeXt block, no BN)</li> <li><code>coatnet_rmlp_2_rw_224</code> - 84.6 @ 224, 85 @ 320 (T)</li> <li>NOTE: official MaxVit weights (in1k) have been released at <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a> — some extra work is needed to port and adapt since my impl was created independently of theirs and has a few small differences + the whole TF same padding fun.</li></ul></li></ul> <h3 class="relative group"><a id="sept-23-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sept-23-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sept 23, 2022</span></h3> <ul data-svelte-h="svelte-1dx7og"><li>LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)<ul><li>vit_base_patch32_224_clip_laion2b</li> <li>vit_large_patch14_224_clip_laion2b</li> <li>vit_huge_patch14_224_clip_laion2b</li> <li>vit_giant_patch14_224_clip_laion2b</li></ul></li></ul> <h3 class="relative group"><a id="sept-7-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sept-7-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sept 7, 2022</span></h3> <ul data-svelte-h="svelte-8zju9s"><li>Hugging Face <a href="https://huggingface.co/docs/hub/timm" rel="nofollow"><code>timm</code> docs</a> home now exists, look for more here in the future</li> <li>Add BEiT-v2 weights for base and large 224x224 models from <a href="https://github.com/microsoft/unilm/tree/master/beit2" rel="nofollow">https://github.com/microsoft/unilm/tree/master/beit2</a></li> <li>Add more weights in <code>maxxvit</code> series incl a <code>pico</code> (7.5M params, 1.9 GMACs), two <code>tiny</code> variants:<ul><li><code>maxvit_rmlp_pico_rw_256</code> - 80.5 @ 256, 81.3 @ 320 (T)</li> <li><code>maxvit_tiny_rw_224</code> - 83.5 @ 224 (G)</li> <li><code>maxvit_rmlp_tiny_rw_256</code> - 84.2 @ 256, 84.8 @ 320 (T)</li></ul></li></ul> <h3 class="relative group"><a id="aug-29-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-29-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 29, 2022</span></h3> <ul data-svelte-h="svelte-e4xo5p"><li>MaxVit window size scales with img_size by default. Add new RelPosMlp MaxViT weight that leverages this:<ul><li><code>maxvit_rmlp_nano_rw_256</code> - 83.0 @ 256, 83.6 @ 320 (T)</li></ul></li></ul> <h3 class="relative group"><a id="aug-26-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-26-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 26, 2022</span></h3> <ul data-svelte-h="svelte-1ho7x52"><li>CoAtNet (<a href="https://arxiv.org/abs/2106.04803" rel="nofollow">https://arxiv.org/abs/2106.04803</a>) and MaxVit (<a href="https://arxiv.org/abs/2204.01697" rel="nofollow">https://arxiv.org/abs/2204.01697</a>) <code>timm</code> original models<ul><li>both found in <a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/maxxvit.py" rel="nofollow"><code>maxxvit.py</code></a> model def, contains numerous experiments outside scope of original papers</li> <li>an unfinished Tensorflow version from MaxVit authors can be found <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a></li></ul></li> <li>Initial CoAtNet and MaxVit timm pretrained weights (working on more):<ul><li><code>coatnet_nano_rw_224</code> - 81.7 @ 224 (T)</li> <li><code>coatnet_rmlp_nano_rw_224</code> - 82.0 @ 224, 82.8 @ 320 (T)</li> <li><code>coatnet_0_rw_224</code> - 82.4 (T) — NOTE timm ‘0’ coatnets have 2 more 3rd stage blocks</li> <li><code>coatnet_bn_0_rw_224</code> - 82.4 (T)</li> <li><code>maxvit_nano_rw_256</code> - 82.9 @ 256 (T)</li> <li><code>coatnet_rmlp_1_rw_224</code> - 83.4 @ 224, 84 @ 320 (T)</li> <li><code>coatnet_1_rw_224</code> - 83.6 @ 224 (G)</li> <li>(T) = TPU trained with <code>bits_and_tpu</code> branch training code, (G) = GPU trained</li></ul></li> <li>GCVit (weights adapted from <a href="https://github.com/NVlabs/GCVit" rel="nofollow">https://github.com/NVlabs/GCVit</a>, code 100% <code>timm</code> re-write for license purposes)</li> <li>MViT-V2 (multi-scale vit, adapted from <a href="https://github.com/facebookresearch/mvit" rel="nofollow">https://github.com/facebookresearch/mvit</a>)</li> <li>EfficientFormer (adapted from <a href="https://github.com/snap-research/EfficientFormer" rel="nofollow">https://github.com/snap-research/EfficientFormer</a>)</li> <li>PyramidVisionTransformer-V2 (adapted from <a href="https://github.com/whai362/PVT" rel="nofollow">https://github.com/whai362/PVT</a>)</li> <li>‘Fast Norm’ support for LayerNorm and GroupNorm that avoids float32 upcast w/ AMP (uses APEX LN if available for further boost)</li></ul> <h3 class="relative group"><a id="aug-15-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-15-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 15, 2022</span></h3> <ul data-svelte-h="svelte-19s36kx"><li>ConvNeXt atto weights added<ul><li><code>convnext_atto</code> - 75.7 @ 224, 77.0 @ 288</li> <li><code>convnext_atto_ols</code> - 75.9 @ 224, 77.2 @ 288</li></ul></li></ul> <h3 class="relative group"><a id="aug-5-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-5-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 5, 2022</span></h3> <ul data-svelte-h="svelte-uqojig"><li>More custom ConvNeXt smaller model defs with weights<ul><li><code>convnext_femto</code> - 77.5 @ 224, 78.7 @ 288</li> <li><code>convnext_femto_ols</code> - 77.9 @ 224, 78.9 @ 288</li> <li><code>convnext_pico</code> - 79.5 @ 224, 80.4 @ 288</li> <li><code>convnext_pico_ols</code> - 79.5 @ 224, 80.5 @ 288</li> <li><code>convnext_nano_ols</code> - 80.9 @ 224, 81.6 @ 288</li></ul></li> <li>Updated EdgeNeXt to improve ONNX export, add new base variant and weights from original (<a href="https://github.com/mmaaz60/EdgeNeXt" rel="nofollow">https://github.com/mmaaz60/EdgeNeXt</a>)</li></ul> <h3 class="relative group"><a id="july-28-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-28-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 28, 2022</span></h3> <ul data-svelte-h="svelte-pdc04g"><li>Add freshly minted DeiT-III Medium (width=512, depth=12, num_heads=8) model weights. Thanks <a href="https://github.com/TouvronHugo" rel="nofollow">Hugo Touvron</a>!</li></ul> <h3 class="relative group"><a id="july-27-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-27-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 27, 2022</span></h3> <ul data-svelte-h="svelte-psmz9t"><li>All runtime benchmark and validation result csv files are finally up-to-date!</li> <li>A few more weights & model defs added:<ul><li><code>darknetaa53</code> - 79.8 @ 256, 80.5 @ 288</li> <li><code>convnext_nano</code> - 80.8 @ 224, 81.5 @ 288</li> <li><code>cs3sedarknet_l</code> - 81.2 @ 256, 81.8 @ 288</li> <li><code>cs3darknet_x</code> - 81.8 @ 256, 82.2 @ 288</li> <li><code>cs3sedarknet_x</code> - 82.2 @ 256, 82.7 @ 288</li> <li><code>cs3edgenet_x</code> - 82.2 @ 256, 82.7 @ 288</li> <li><code>cs3se_edgenet_x</code> - 82.8 @ 256, 83.5 @ 320</li></ul></li> <li><code>cs3</code> weights above all trained on TPU w/ <code>bits_and_tpu</code> branch. Thanks to TRC program!</li> <li>Add output_stride=8 and 16 support to ConvNeXt (dilation)</li> <li>deit3 models not being able to resize pos_emb fixed</li> <li>Version 0.6.7 PyPi release (/w above bug fixes and new weighs since 0.6.5)</li></ul> <h3 class="relative group"><a id="july-8-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-8-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 8, 2022</span></h3> <p data-svelte-h="svelte-14yj5kr">More models, more fixes</p> <ul data-svelte-h="svelte-1ocq7qf"><li>Official research models (w/ weights) added:<ul><li>EdgeNeXt from (<a href="https://github.com/mmaaz60/EdgeNeXt" rel="nofollow">https://github.com/mmaaz60/EdgeNeXt</a>)</li> <li>MobileViT-V2 from (<a href="https://github.com/apple/ml-cvnets" rel="nofollow">https://github.com/apple/ml-cvnets</a>)</li> <li>DeiT III (Revenge of the ViT) from (<a href="https://github.com/facebookresearch/deit" rel="nofollow">https://github.com/facebookresearch/deit</a>)</li></ul></li> <li>My own models:<ul><li>Small <code>ResNet</code> defs added by request with 1 block repeats for both basic and bottleneck (resnet10 and resnet14)</li> <li><code>CspNet</code> refactored with dataclass config, simplified CrossStage3 (<code>cs3</code>) option. These are closer to YOLO-v5+ backbone defs.</li> <li>More relative position vit fiddling. Two <code>srelpos</code> (shared relative position) models trained, and a medium w/ class token.</li> <li>Add an alternate downsample mode to EdgeNeXt and train a <code>small</code> model. Better than original small, but not their new USI trained weights.</li></ul></li> <li>My own model weight results (all ImageNet-1k training)<ul><li><code>resnet10t</code> - 66.5 @ 176, 68.3 @ 224</li> <li><code>resnet14t</code> - 71.3 @ 176, 72.3 @ 224</li> <li><code>resnetaa50</code> - 80.6 @ 224 , 81.6 @ 288</li> <li><code>darknet53</code> - 80.0 @ 256, 80.5 @ 288</li> <li><code>cs3darknet_m</code> - 77.0 @ 256, 77.6 @ 288</li> <li><code>cs3darknet_focus_m</code> - 76.7 @ 256, 77.3 @ 288</li> <li><code>cs3darknet_l</code> - 80.4 @ 256, 80.9 @ 288</li> <li><code>cs3darknet_focus_l</code> - 80.3 @ 256, 80.9 @ 288</li> <li><code>vit_srelpos_small_patch16_224</code> - 81.1 @ 224, 82.1 @ 320</li> <li><code>vit_srelpos_medium_patch16_224</code> - 82.3 @ 224, 83.1 @ 320</li> <li><code>vit_relpos_small_patch16_cls_224</code> - 82.6 @ 224, 83.6 @ 320</li> <li><code>edgnext_small_rw</code> - 79.6 @ 224, 80.4 @ 320</li></ul></li> <li><code>cs3</code>, <code>darknet</code>, and <code>vit_relpos</code> weights above all trained on TPU thanks to TRC program! Rest trained on overheating GPUs.</li> <li>Hugging Face Hub support fixes verified, demo notebook TBA</li> <li>Pretrained weights / configs can be loaded externally (ie from local disk) w/ support for head adaptation.</li> <li>Add support to change image extensions scanned by <code>timm</code> datasets/readers. See (<a href="https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103" rel="nofollow">https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103</a>)</li> <li>Default ConvNeXt LayerNorm impl to use <code>F.layer_norm(x.permute(0, 2, 3, 1), ...).permute(0, 3, 1, 2)</code> via <code>LayerNorm2d</code> in all cases.<ul><li>a bit slower than previous custom impl on some hardware (ie Ampere w/ CL), but overall fewer regressions across wider HW / PyTorch version ranges.</li> <li>previous impl exists as <code>LayerNormExp2d</code> in <code>models/layers/norm.py</code></li></ul></li> <li>Numerous bug fixes</li> <li>Currently testing for imminent PyPi 0.6.x release</li> <li>LeViT pretraining of larger models still a WIP, they don’t train well / easily without distillation. Time to add distill support (finally)?</li> <li>ImageNet-22k weight training + finetune ongoing, work on multi-weight support (slowly) chugging along (there are a LOT of weights, sigh) …</li></ul> <h3 class="relative group"><a id="may-13-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-13-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 13, 2022</span></h3> <ul data-svelte-h="svelte-1ksbgxy"><li>Official Swin-V2 models and weights added from (<a href="https://github.com/microsoft/Swin-Transformer" rel="nofollow">https://github.com/microsoft/Swin-Transformer</a>). Cleaned up to support torchscript.</li> <li>Some refactoring for existing <code>timm</code> Swin-V2-CR impl, will likely do a bit more to bring parts closer to official and decide whether to merge some aspects.</li> <li>More Vision Transformer relative position / residual post-norm experiments (all trained on TPU thanks to TRC program)<ul><li><code>vit_relpos_small_patch16_224</code> - 81.5 @ 224, 82.5 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_relpos_medium_patch16_rpn_224</code> - 82.3 @ 224, 83.1 @ 320 — rel pos + res-post-norm, no class token, avg pool</li> <li><code>vit_relpos_medium_patch16_224</code> - 82.5 @ 224, 83.3 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_relpos_base_patch16_gapcls_224</code> - 82.8 @ 224, 83.9 @ 320 — rel pos, layer scale, class token, avg pool (by mistake)</li></ul></li> <li>Bring 512 dim, 8-head ‘medium’ ViT model variant back to life (after using in a pre DeiT ‘small’ model for first ViT impl back in 2020)</li> <li>Add ViT relative position support for switching btw existing impl and some additions in official Swin-V2 impl for future trials</li> <li>Sequencer2D impl (<a href="https://arxiv.org/abs/2205.01972" rel="nofollow">https://arxiv.org/abs/2205.01972</a>), added via PR from author (<a href="https://github.com/okojoalg" rel="nofollow">https://github.com/okojoalg</a>)</li></ul> <h3 class="relative group"><a id="may-2-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-2-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 2, 2022</span></h3> <ul data-svelte-h="svelte-1phzket"><li>Vision Transformer experiments adding Relative Position (Swin-V2 log-coord) (<code>vision_transformer_relpos.py</code>) and Residual Post-Norm branches (from Swin-V2) (<code>vision_transformer.py</code>)<ul><li><code>vit_relpos_base_patch32_plus_rpn_256</code> - 79.5 @ 256, 80.6 @ 320 — rel pos + extended width + res-post-norm, no class token, avg pool</li> <li><code>vit_relpos_base_patch16_224</code> - 82.5 @ 224, 83.6 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_base_patch16_rpn_224</code> - 82.3 @ 224 — rel pos + res-post-norm, no class token, avg pool</li></ul></li> <li>Vision Transformer refactor to remove representation layer that was only used in initial vit and rarely used since with newer pretrain (ie <code>How to Train Your ViT</code>)</li> <li><code>vit_</code> models support removal of class token, use of global average pool, use of fc_norm (ala beit, mae).</li></ul> <h3 class="relative group"><a id="april-22-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-22-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 22, 2022</span></h3> <ul data-svelte-h="svelte-1whjijd"><li><code>timm</code> models are now officially supported in <a href="https://www.fast.ai/" rel="nofollow">fast.ai</a>! Just in time for the new Practical Deep Learning course. <code>timmdocs</code> documentation link updated to <a href="http://timm.fast.ai/" rel="nofollow">timm.fast.ai</a>.</li> <li>Two more model weights added in the TPU trained <a href="https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights" rel="nofollow">series</a>. Some In22k pretrain still in progress.<ul><li><code>seresnext101d_32x8d</code> - 83.69 @ 224, 84.35 @ 288</li> <li><code>seresnextaa101d_32x8d</code> (anti-aliased w/ AvgPool2d) - 83.85 @ 224, 84.57 @ 288</li></ul></li></ul> <h3 class="relative group"><a id="march-23-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-23-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 23, 2022</span></h3> <ul data-svelte-h="svelte-1dzgmu6"><li>Add <code>ParallelBlock</code> and <code>LayerScale</code> option to base vit models to support model configs in <a href="https://arxiv.org/abs/2203.09795" rel="nofollow">Three things everyone should know about ViT</a></li> <li><code>convnext_tiny_hnf</code> (head norm first) weights trained with (close to) A2 recipe, 82.2% top-1, could do better with more epochs.</li></ul> <h3 class="relative group"><a id="march-21-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-21-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 21, 2022</span></h3> <ul data-svelte-h="svelte-abrsee"><li>Merge <code>norm_norm_norm</code>. <strong>IMPORTANT</strong> this update for a coming 0.6.x release will likely de-stabilize the master branch for a while. Branch <a href="https://github.com/rwightman/pytorch-image-models/tree/0.5.x" rel="nofollow"><code>0.5.x</code></a> or a previous 0.5.x release can be used if stability is required.</li> <li>Significant weights update (all TPU trained) as described in this <a href="https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights" rel="nofollow">release</a><ul><li><code>regnety_040</code> - 82.3 @ 224, 82.96 @ 288</li> <li><code>regnety_064</code> - 83.0 @ 224, 83.65 @ 288</li> <li><code>regnety_080</code> - 83.17 @ 224, 83.86 @ 288</li> <li><code>regnetv_040</code> - 82.44 @ 224, 83.18 @ 288 (timm pre-act)</li> <li><code>regnetv_064</code> - 83.1 @ 224, 83.71 @ 288 (timm pre-act)</li> <li><code>regnetz_040</code> - 83.67 @ 256, 84.25 @ 320</li> <li><code>regnetz_040h</code> - 83.77 @ 256, 84.5 @ 320 (w/ extra fc in head)</li> <li><code>resnetv2_50d_gn</code> - 80.8 @ 224, 81.96 @ 288 (pre-act GroupNorm)</li> <li><code>resnetv2_50d_evos</code> 80.77 @ 224, 82.04 @ 288 (pre-act EvoNormS)</li> <li><code>regnetz_c16_evos</code> - 81.9 @ 256, 82.64 @ 320 (EvoNormS)</li> <li><code>regnetz_d8_evos</code> - 83.42 @ 256, 84.04 @ 320 (EvoNormS)</li> <li><code>xception41p</code> - 82 @ 299 (timm pre-act)</li> <li><code>xception65</code> - 83.17 @ 299</li> <li><code>xception65p</code> - 83.14 @ 299 (timm pre-act)</li> <li><code>resnext101_64x4d</code> - 82.46 @ 224, 83.16 @ 288</li> <li><code>seresnext101_32x8d</code> - 83.57 @ 224, 84.270 @ 288</li> <li><code>resnetrs200</code> - 83.85 @ 256, 84.44 @ 320</li></ul></li> <li>HuggingFace hub support fixed w/ initial groundwork for allowing alternative ‘config sources’ for pretrained model definitions and weights (generic local file / remote url support soon)</li> <li>SwinTransformer-V2 implementation added. Submitted by <a href="https://github.com/ChristophReich1996" rel="nofollow">Christoph Reich</a>. Training experiments and model changes by myself are ongoing so expect compat breaks.</li> <li>Swin-S3 (AutoFormerV2) models / weights added from <a href="https://github.com/microsoft/Cream/tree/main/AutoFormerV2" rel="nofollow">https://github.com/microsoft/Cream/tree/main/AutoFormerV2</a></li> <li>MobileViT models w/ weights adapted from <a href="https://github.com/apple/ml-cvnets" rel="nofollow">https://github.com/apple/ml-cvnets</a></li> <li>PoolFormer models w/ weights adapted from <a href="https://github.com/sail-sg/poolformer" rel="nofollow">https://github.com/sail-sg/poolformer</a></li> <li>VOLO models w/ weights adapted from <a href="https://github.com/sail-sg/volo" rel="nofollow">https://github.com/sail-sg/volo</a></li> <li>Significant work experimenting with non-BatchNorm norm layers such as EvoNorm, FilterResponseNorm, GroupNorm, etc</li> <li>Enhance support for alternate norm + act (‘NormAct’) layers added to a number of models, esp EfficientNet/MobileNetV3, RegNet, and aligned Xception</li> <li>Grouped conv support added to EfficientNet family</li> <li>Add ‘group matching’ API to all models to allow grouping model parameters for application of ‘layer-wise’ LR decay, lr scale added to LR scheduler</li> <li>Gradient checkpointing support added to many models</li> <li><code>forward_head(x, pre_logits=False)</code> fn added to all models to allow separate calls of <code>forward_features</code> + <code>forward_head</code></li> <li>All vision transformer and vision MLP models update to return non-pooled / non-token selected features from <code>foward_features</code>, for consistency with CNN models, token selection or pooling now applied in <code>forward_head</code></li></ul> <h3 class="relative group"><a id="feb-2-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-2-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 2, 2022</span></h3> <ul data-svelte-h="svelte-rnuh6y"><li><a href="https://github.com/Chris-hughes10" rel="nofollow">Chris Hughes</a> posted an exhaustive run through of <code>timm</code> on his blog yesterday. Well worth a read. <a href="https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055" rel="nofollow">Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide</a></li> <li>I’m currently prepping to merge the <code>norm_norm_norm</code> branch back to master (ver 0.6.x) in next week or so.<ul><li>The changes are more extensive than usual and may destabilize and break some model API use (aiming for full backwards compat). So, beware <code>pip install git+https://github.com/rwightman/pytorch-image-models</code> installs!</li> <li><code>0.5.x</code> releases and a <code>0.5.x</code> branch will remain stable with a cherry pick or two until dust clears. Recommend sticking to pypi install for a bit if you want stable.</li></ul></li></ul> <h3 class="relative group"><a id="jan-14-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-14-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 14, 2022</span></h3> <ul data-svelte-h="svelte-7l2ez"><li>Version 0.5.4 w/ release to be pushed to pypi. It’s been a while since last pypi update and riskier changes will be merged to main branch soon…</li> <li>Add ConvNeXT models /w weights from official impl (<a href="https://github.com/facebookresearch/ConvNeXt" rel="nofollow">https://github.com/facebookresearch/ConvNeXt</a>), a few perf tweaks, compatible with timm features</li> <li>Tried training a few small (~1.8-3M param) / mobile optimized models, a few are good so far, more on the way…<ul><li><code>mnasnet_small</code> - 65.6 top-1</li> <li><code>mobilenetv2_050</code> - 65.9</li> <li><code>lcnet_100/075/050</code> - 72.1 / 68.8 / 63.1</li> <li><code>semnasnet_075</code> - 73</li> <li><code>fbnetv3_b/d/g</code> - 79.1 / 79.7 / 82.0</li></ul></li> <li>TinyNet models added by <a href="https://github.com/rsomani95" rel="nofollow">rsomani95</a></li> <li>LCNet added via MobileNetV3 architecture</li></ul> <h3 class="relative group"><a id="jan-5-2023" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-5-2023"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 5, 2023</span></h3> <ul data-svelte-h="svelte-jygzt1"><li>ConvNeXt-V2 models and weights added to existing <code>convnext.py</code><ul><li>Paper: <a href="http://arxiv.org/abs/2301.00808" rel="nofollow">ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders</a></li> <li>Reference impl: <a href="https://github.com/facebookresearch/ConvNeXt-V2" rel="nofollow">https://github.com/facebookresearch/ConvNeXt-V2</a> (NOTE: weights currently CC-BY-NC)</li></ul></li></ul> <h3 class="relative group"><a id="dec-23-2022-" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-23-2022-"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 23, 2022 🎄☃</span></h3> <ul data-svelte-h="svelte-hz5yxo"><li>Add FlexiViT models and weights from <a href="https://github.com/google-research/big_vision" rel="nofollow">https://github.com/google-research/big_vision</a> (check out paper at <a href="https://arxiv.org/abs/2212.08013" rel="nofollow">https://arxiv.org/abs/2212.08013</a>)<ul><li>NOTE currently resizing is static on model creation, on-the-fly dynamic / train patch size sampling is a WIP</li></ul></li> <li>Many more models updated to multi-weight and downloadable via HF hub now (convnext, efficientnet, mobilenet, vision_transformer, beit)</li> <li>More model pretrained tag and adjustments, some model names changed (working on deprecation translations, consider main branch DEV branch right now, use 0.6.x for stable use)</li> <li>More ImageNet-12k (subset of 22k) pretrain models popping up:<ul><li><code>efficientnet_b5.in12k_ft_in1k</code> - 85.9 @ 448x448</li> <li><code>vit_medium_patch16_gap_384.in12k_ft_in1k</code> - 85.5 @ 384x384</li> <li><code>vit_medium_patch16_gap_256.in12k_ft_in1k</code> - 84.5 @ 256x256</li> <li><code>convnext_nano.in12k_ft_in1k</code> - 82.9 @ 288x288</li></ul></li></ul> <h3 class="relative group"><a id="dec-8-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-8-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 8, 2022</span></h3> <ul data-svelte-h="svelte-1ybsdar"><li>Add ‘EVA l’ to <code>vision_transformer.py</code>, MAE style ViT-L/14 MIM pretrain w/ EVA-CLIP targets, FT on ImageNet-1k (w/ ImageNet-22k intermediate for some)<ul><li>original source: <a href="https://github.com/baaivision/EVA" rel="nofollow">https://github.com/baaivision/EVA</a></li></ul></li></ul> <table data-svelte-h="svelte-1kdm8f8"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">eva_large_patch14_336.in22k_ft_in22k_in1k</td> <td align="right">89.2</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_336.in22k_ft_in1k</td> <td align="right">88.7</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_196.in22k_ft_in22k_in1k</td> <td align="right">88.6</td> <td align="right">304.1</td> <td align="right">61.6</td> <td align="right">63.5</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_large_patch14_196.in22k_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.1</td> <td align="right">61.6</td> <td align="right">63.5</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="dec-6-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-6-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 6, 2022</span></h3> <ul data-svelte-h="svelte-1o52qz7"><li>Add ‘EVA g’, BEiT style ViT-g/14 model weights w/ both MIM pretrain and CLIP pretrain to <code>beit.py</code>. <ul><li>original source: <a href="https://github.com/baaivision/EVA" rel="nofollow">https://github.com/baaivision/EVA</a></li> <li>paper: <a href="https://arxiv.org/abs/2211.07636" rel="nofollow">https://arxiv.org/abs/2211.07636</a></li></ul></li></ul> <table data-svelte-h="svelte-1jlkusq"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">eva_giant_patch14_560.m30m_ft_in22k_in1k</td> <td align="right">89.8</td> <td align="right">1014.4</td> <td align="right">1906.8</td> <td align="right">2577.2</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_336.m30m_ft_in22k_in1k</td> <td align="right">89.6</td> <td align="right">1013</td> <td align="right">620.6</td> <td align="right">550.7</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_336.clip_ft_in1k</td> <td align="right">89.4</td> <td align="right">1013</td> <td align="right">620.6</td> <td align="right">550.7</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr> <tr><td align="left">eva_giant_patch14_224.clip_ft_in1k</td> <td align="right">89.1</td> <td align="right">1012.6</td> <td align="right">267.2</td> <td align="right">192.6</td> <td align="left"><a href="https://huggingface.co/BAAI/EVA" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="dec-5-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#dec-5-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Dec 5, 2022</span></h3> <ul data-svelte-h="svelte-1f4joci"><li>Pre-release (<code>0.8.0dev0</code>) of multi-weight support (<code>model_arch.pretrained_tag</code>). Install with <code>pip install --pre timm</code><ul><li>vision_transformer, maxvit, convnext are the first three model impl w/ support</li> <li>model names are changing with this (previous _21k, etc. fn will merge), still sorting out deprecation handling</li> <li>bugs are likely, but I need feedback so please try it out</li> <li>if stability is needed, please use 0.6.x pypi releases or clone from <a href="https://github.com/rwightman/pytorch-image-models/tree/0.6.x" rel="nofollow">0.6.x branch</a></li></ul></li> <li>Support for PyTorch 2.0 compile is added in train/validate/inference/benchmark, use <code>--torchcompile</code> argument</li> <li>Inference script allows more control over output, select k for top-class index + prob json, csv or parquet output</li> <li>Add a full set of fine-tuned CLIP image tower weights from both LAION-2B and original OpenAI CLIP models</li></ul> <table data-svelte-h="svelte-1w1dpjo"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k</td> <td align="right">88.6</td> <td align="right">632.5</td> <td align="right">391</td> <td align="right">407.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_336.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.openai_ft_in12k_in1k</td> <td align="right">88.3</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">632</td> <td align="right">167.4</td> <td align="right">139.4</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.laion2b_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.openai_ft_in12k_in1k</td> <td align="right">88.2</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">87.9</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.openai_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_336.laion2b_ft_in1k</td> <td align="right">87.9</td> <td align="right">304.5</td> <td align="right">191.1</td> <td align="right">270.2</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_336.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_huge_patch14_clip_224.laion2b_ft_in1k</td> <td align="right">87.6</td> <td align="right">632</td> <td align="right">167.4</td> <td align="right">139.4</td> <td align="left"><a href="https://huggingface.co/timm/vit_huge_patch14_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_large_patch14_clip_224.laion2b_ft_in1k</td> <td align="right">87.3</td> <td align="right">304.2</td> <td align="right">81.1</td> <td align="right">88.8</td> <td align="left"><a href="https://huggingface.co/timm/vit_large_patch14_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.laion2b_ft_in12k_in1k</td> <td align="right">87.2</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.openai_ft_in12k_in1k</td> <td align="right">87</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.laion2b_ft_in1k</td> <td align="right">86.6</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_384.openai_ft_in1k</td> <td align="right">86.2</td> <td align="right">86.9</td> <td align="right">55.5</td> <td align="right">101.6</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_384.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">86.2</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.openai_ft_in12k_in1k</td> <td align="right">85.9</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_448.laion2b_ft_in12k_in1k</td> <td align="right">85.8</td> <td align="right">88.3</td> <td align="right">17.9</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_448.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.laion2b_ft_in1k</td> <td align="right">85.5</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_384.laion2b_ft_in12k_in1k</td> <td align="right">85.4</td> <td align="right">88.3</td> <td align="right">13.1</td> <td align="right">16.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_384.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch16_clip_224.openai_ft_in1k</td> <td align="right">85.3</td> <td align="right">86.6</td> <td align="right">17.6</td> <td align="right">23.9</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch16_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_384.openai_ft_in12k_in1k</td> <td align="right">85.2</td> <td align="right">88.3</td> <td align="right">13.1</td> <td align="right">16.5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_384.openai_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.laion2b_ft_in12k_in1k</td> <td align="right">83.3</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in12k_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.laion2b_ft_in1k</td> <td align="right">82.6</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.laion2b_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">vit_base_patch32_clip_224.openai_ft_in1k</td> <td align="right">81.9</td> <td align="right">88.2</td> <td align="right">4.4</td> <td align="right">5</td> <td align="left"><a href="https://huggingface.co/timm/vit_base_patch32_clip_224.openai_ft_in1k" rel="nofollow">link</a></td></tr></tbody></table> <ul data-svelte-h="svelte-mr00b7"><li>Port of MaxViT Tensorflow Weights from official impl at <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a><ul><li>There was larger than expected drops for the upscaled 384/512 in21k fine-tune weights, possible detail missing, but the 21k FT did seem sensitive to small preprocessing</li></ul></li></ul> <table data-svelte-h="svelte-uhceid"><thead><tr><th align="left">model</th> <th align="right">top1</th> <th align="right">param_count</th> <th align="right">gmac</th> <th align="right">macts</th> <th align="left">hub</th></tr></thead> <tbody><tr><td align="left">maxvit_xlarge_tf_512.in21k_ft_in1k</td> <td align="right">88.5</td> <td align="right">475.8</td> <td align="right">534.1</td> <td align="right">1413.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_xlarge_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_xlarge_tf_384.in21k_ft_in1k</td> <td align="right">88.3</td> <td align="right">475.3</td> <td align="right">292.8</td> <td align="right">668.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_xlarge_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_512.in21k_ft_in1k</td> <td align="right">88.2</td> <td align="right">119.9</td> <td align="right">138</td> <td align="right">704</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_512.in21k_ft_in1k</td> <td align="right">88</td> <td align="right">212.3</td> <td align="right">244.8</td> <td align="right">942.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_512.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_384.in21k_ft_in1k</td> <td align="right">88</td> <td align="right">212</td> <td align="right">132.6</td> <td align="right">445.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_384.in21k_ft_in1k</td> <td align="right">87.9</td> <td align="right">119.6</td> <td align="right">73.8</td> <td align="right">332.9</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_384.in21k_ft_in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_512.in1k</td> <td align="right">86.6</td> <td align="right">119.9</td> <td align="right">138</td> <td align="right">704</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_512.in1k</td> <td align="right">86.5</td> <td align="right">212.3</td> <td align="right">244.8</td> <td align="right">942.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_384.in1k</td> <td align="right">86.3</td> <td align="right">119.6</td> <td align="right">73.8</td> <td align="right">332.9</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_384.in1k</td> <td align="right">86.2</td> <td align="right">212</td> <td align="right">132.6</td> <td align="right">445.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_512.in1k</td> <td align="right">86.1</td> <td align="right">69.1</td> <td align="right">67.3</td> <td align="right">383.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_512.in1k</td> <td align="right">85.7</td> <td align="right">31</td> <td align="right">33.5</td> <td align="right">257.6</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_512.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_384.in1k</td> <td align="right">85.5</td> <td align="right">69</td> <td align="right">35.9</td> <td align="right">183.6</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_384.in1k</td> <td align="right">85.1</td> <td align="right">31</td> <td align="right">17.5</td> <td align="right">123.4</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_384.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_large_tf_224.in1k</td> <td align="right">84.9</td> <td align="right">211.8</td> <td align="right">43.7</td> <td align="right">127.4</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_large_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_base_tf_224.in1k</td> <td align="right">84.9</td> <td align="right">119.5</td> <td align="right">24</td> <td align="right">95</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_base_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_small_tf_224.in1k</td> <td align="right">84.4</td> <td align="right">68.9</td> <td align="right">11.7</td> <td align="right">53.2</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_small_tf_224.in1k" rel="nofollow">link</a></td></tr> <tr><td align="left">maxvit_tiny_tf_224.in1k</td> <td align="right">83.4</td> <td align="right">30.9</td> <td align="right">5.6</td> <td align="right">35.8</td> <td align="left"><a href="https://huggingface.co/timm/maxvit_tiny_tf_224.in1k" rel="nofollow">link</a></td></tr></tbody></table> <h3 class="relative group"><a id="oct-15-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#oct-15-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Oct 15, 2022</span></h3> <ul data-svelte-h="svelte-a95su3"><li>Train and validation script enhancements</li> <li>Non-GPU (ie CPU) device support</li> <li>SLURM compatibility for train script</li> <li>HF datasets support (via ReaderHfds)</li> <li>TFDS/WDS dataloading improvements (sample padding/wrap for distributed use fixed wrt sample count estimate)</li> <li>in_chans !=3 support for scripts / loader</li> <li>Adan optimizer</li> <li>Can enable per-step LR scheduling via args</li> <li>Dataset ‘parsers’ renamed to ‘readers’, more descriptive of purpose</li> <li>AMP args changed, APEX via <code>--amp-impl apex</code>, bfloat16 supportedf via <code>--amp-dtype bfloat16</code></li> <li>main branch switched to 0.7.x version, 0.6x forked for stable release of weight only adds</li> <li>master -> main branch rename</li></ul> <h3 class="relative group"><a id="oct-10-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#oct-10-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Oct 10, 2022</span></h3> <ul data-svelte-h="svelte-w97o2d"><li>More weights in <code>maxxvit</code> series, incl first ConvNeXt block based <code>coatnext</code> and <code>maxxvit</code> experiments:<ul><li><code>coatnext_nano_rw_224</code> - 82.0 @ 224 (G) — (uses ConvNeXt conv block, no BatchNorm)</li> <li><code>maxxvit_rmlp_nano_rw_256</code> - 83.0 @ 256, 83.7 @ 320 (G) (uses ConvNeXt conv block, no BN)</li> <li><code>maxvit_rmlp_small_rw_224</code> - 84.5 @ 224, 85.1 @ 320 (G)</li> <li><code>maxxvit_rmlp_small_rw_256</code> - 84.6 @ 256, 84.9 @ 288 (G) — could be trained better, hparams need tuning (uses ConvNeXt block, no BN)</li> <li><code>coatnet_rmlp_2_rw_224</code> - 84.6 @ 224, 85 @ 320 (T)</li> <li>NOTE: official MaxVit weights (in1k) have been released at <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a> — some extra work is needed to port and adapt since my impl was created independently of theirs and has a few small differences + the whole TF same padding fun.</li></ul></li></ul> <h3 class="relative group"><a id="sept-23-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sept-23-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sept 23, 2022</span></h3> <ul data-svelte-h="svelte-1dx7og"><li>LAION-2B CLIP image towers supported as pretrained backbones for fine-tune or features (no classifier)<ul><li>vit_base_patch32_224_clip_laion2b</li> <li>vit_large_patch14_224_clip_laion2b</li> <li>vit_huge_patch14_224_clip_laion2b</li> <li>vit_giant_patch14_224_clip_laion2b</li></ul></li></ul> <h3 class="relative group"><a id="sept-7-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#sept-7-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Sept 7, 2022</span></h3> <ul data-svelte-h="svelte-8zju9s"><li>Hugging Face <a href="https://huggingface.co/docs/hub/timm" rel="nofollow"><code>timm</code> docs</a> home now exists, look for more here in the future</li> <li>Add BEiT-v2 weights for base and large 224x224 models from <a href="https://github.com/microsoft/unilm/tree/master/beit2" rel="nofollow">https://github.com/microsoft/unilm/tree/master/beit2</a></li> <li>Add more weights in <code>maxxvit</code> series incl a <code>pico</code> (7.5M params, 1.9 GMACs), two <code>tiny</code> variants:<ul><li><code>maxvit_rmlp_pico_rw_256</code> - 80.5 @ 256, 81.3 @ 320 (T)</li> <li><code>maxvit_tiny_rw_224</code> - 83.5 @ 224 (G)</li> <li><code>maxvit_rmlp_tiny_rw_256</code> - 84.2 @ 256, 84.8 @ 320 (T)</li></ul></li></ul> <h3 class="relative group"><a id="aug-29-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-29-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 29, 2022</span></h3> <ul data-svelte-h="svelte-e4xo5p"><li>MaxVit window size scales with img_size by default. Add new RelPosMlp MaxViT weight that leverages this:<ul><li><code>maxvit_rmlp_nano_rw_256</code> - 83.0 @ 256, 83.6 @ 320 (T)</li></ul></li></ul> <h3 class="relative group"><a id="aug-26-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-26-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 26, 2022</span></h3> <ul data-svelte-h="svelte-16sk34w"><li>CoAtNet (<a href="https://arxiv.org/abs/2106.04803" rel="nofollow">https://arxiv.org/abs/2106.04803</a>) and MaxVit (<a href="https://arxiv.org/abs/2204.01697" rel="nofollow">https://arxiv.org/abs/2204.01697</a>) <code>timm</code> original models<ul><li>both found in <a href="https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/maxxvit.py" rel="nofollow"><code>maxxvit.py</code></a> model def, contains numerous experiments outside scope of original papers</li> <li>an unfinished Tensorflow version from MaxVit authors can be found <a href="https://github.com/google-research/maxvit" rel="nofollow">https://github.com/google-research/maxvit</a></li></ul></li> <li>Initial CoAtNet and MaxVit timm pretrained weights (working on more):<ul><li><code>coatnet_nano_rw_224</code> - 81.7 @ 224 (T)</li> <li><code>coatnet_rmlp_nano_rw_224</code> - 82.0 @ 224, 82.8 @ 320 (T)</li> <li><code>coatnet_0_rw_224</code> - 82.4 (T) — NOTE timm ‘0’ coatnets have 2 more 3rd stage blocks</li> <li><code>coatnet_bn_0_rw_224</code> - 82.4 (T)</li> <li><code>maxvit_nano_rw_256</code> - 82.9 @ 256 (T)</li> <li><code>coatnet_rmlp_1_rw_224</code> - 83.4 @ 224, 84 @ 320 (T)</li> <li><code>coatnet_1_rw_224</code> - 83.6 @ 224 (G)</li> <li>(T) = TPU trained with <code>bits_and_tpu</code> branch training code, (G) = GPU trained</li></ul></li> <li>GCVit (weights adapted from <a href="https://github.com/NVlabs/GCVit" rel="nofollow">https://github.com/NVlabs/GCVit</a>, code 100% <code>timm</code> re-write for license purposes)</li> <li>MViT-V2 (multi-scale vit, adapted from <a href="https://github.com/facebookresearch/mvit" rel="nofollow">https://github.com/facebookresearch/mvit</a>)</li> <li>EfficientFormer (adapted from <a href="https://github.com/snap-research/EfficientFormer" rel="nofollow">https://github.com/snap-research/EfficientFormer</a>)</li> <li>PyramidVisionTransformer-V2 (adapted from <a href="https://github.com/whai362/PVT" rel="nofollow">https://github.com/whai362/PVT</a>)</li> <li>‘Fast Norm’ support for LayerNorm and GroupNorm that avoids float32 upcast w/ AMP (uses APEX LN if available for further boost)</li></ul> <h3 class="relative group"><a id="aug-15-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-15-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 15, 2022</span></h3> <ul data-svelte-h="svelte-19s36kx"><li>ConvNeXt atto weights added<ul><li><code>convnext_atto</code> - 75.7 @ 224, 77.0 @ 288</li> <li><code>convnext_atto_ols</code> - 75.9 @ 224, 77.2 @ 288</li></ul></li></ul> <h3 class="relative group"><a id="aug-5-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#aug-5-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Aug 5, 2022</span></h3> <ul data-svelte-h="svelte-1v6eoga"><li>More custom ConvNeXt smaller model defs with weights <ul><li><code>convnext_femto</code> - 77.5 @ 224, 78.7 @ 288</li> <li><code>convnext_femto_ols</code> - 77.9 @ 224, 78.9 @ 288</li> <li><code>convnext_pico</code> - 79.5 @ 224, 80.4 @ 288</li> <li><code>convnext_pico_ols</code> - 79.5 @ 224, 80.5 @ 288</li> <li><code>convnext_nano_ols</code> - 80.9 @ 224, 81.6 @ 288</li></ul></li> <li>Updated EdgeNeXt to improve ONNX export, add new base variant and weights from original (<a href="https://github.com/mmaaz60/EdgeNeXt" rel="nofollow">https://github.com/mmaaz60/EdgeNeXt</a>)</li></ul> <h3 class="relative group"><a id="july-28-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-28-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 28, 2022</span></h3> <ul data-svelte-h="svelte-pdc04g"><li>Add freshly minted DeiT-III Medium (width=512, depth=12, num_heads=8) model weights. Thanks <a href="https://github.com/TouvronHugo" rel="nofollow">Hugo Touvron</a>!</li></ul> <h3 class="relative group"><a id="july-27-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-27-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 27, 2022</span></h3> <ul data-svelte-h="svelte-15k3fgy"><li>All runtime benchmark and validation result csv files are up-to-date!</li> <li>A few more weights & model defs added:<ul><li><code>darknetaa53</code> - 79.8 @ 256, 80.5 @ 288</li> <li><code>convnext_nano</code> - 80.8 @ 224, 81.5 @ 288</li> <li><code>cs3sedarknet_l</code> - 81.2 @ 256, 81.8 @ 288</li> <li><code>cs3darknet_x</code> - 81.8 @ 256, 82.2 @ 288</li> <li><code>cs3sedarknet_x</code> - 82.2 @ 256, 82.7 @ 288</li> <li><code>cs3edgenet_x</code> - 82.2 @ 256, 82.7 @ 288</li> <li><code>cs3se_edgenet_x</code> - 82.8 @ 256, 83.5 @ 320</li></ul></li> <li><code>cs3</code> weights above all trained on TPU w/ <code>bits_and_tpu</code> branch. Thanks to TRC program!</li> <li>Add output_stride=8 and 16 support to ConvNeXt (dilation)</li> <li>deit3 models not being able to resize pos_emb fixed</li> <li>Version 0.6.7 PyPi release (/w above bug fixes and new weighs since 0.6.5)</li></ul> <h3 class="relative group"><a id="july-8-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#july-8-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>July 8, 2022</span></h3> <p data-svelte-h="svelte-14yj5kr">More models, more fixes</p> <ul data-svelte-h="svelte-1stsmlj"><li>Official research models (w/ weights) added:<ul><li>EdgeNeXt from (<a href="https://github.com/mmaaz60/EdgeNeXt" rel="nofollow">https://github.com/mmaaz60/EdgeNeXt</a>)</li> <li>MobileViT-V2 from (<a href="https://github.com/apple/ml-cvnets" rel="nofollow">https://github.com/apple/ml-cvnets</a>)</li> <li>DeiT III (Revenge of the ViT) from (<a href="https://github.com/facebookresearch/deit" rel="nofollow">https://github.com/facebookresearch/deit</a>)</li></ul></li> <li>My own models:<ul><li>Small <code>ResNet</code> defs added by request with 1 block repeats for both basic and bottleneck (resnet10 and resnet14)</li> <li><code>CspNet</code> refactored with dataclass config, simplified CrossStage3 (<code>cs3</code>) option. These are closer to YOLO-v5+ backbone defs.</li> <li>More relative position vit fiddling. Two <code>srelpos</code> (shared relative position) models trained, and a medium w/ class token.</li> <li>Add an alternate downsample mode to EdgeNeXt and train a <code>small</code> model. Better than original small, but not their new USI trained weights.</li></ul></li> <li>My own model weight results (all ImageNet-1k training)<ul><li><code>resnet10t</code> - 66.5 @ 176, 68.3 @ 224</li> <li><code>resnet14t</code> - 71.3 @ 176, 72.3 @ 224</li> <li><code>resnetaa50</code> - 80.6 @ 224 , 81.6 @ 288</li> <li><code>darknet53</code> - 80.0 @ 256, 80.5 @ 288</li> <li><code>cs3darknet_m</code> - 77.0 @ 256, 77.6 @ 288</li> <li><code>cs3darknet_focus_m</code> - 76.7 @ 256, 77.3 @ 288</li> <li><code>cs3darknet_l</code> - 80.4 @ 256, 80.9 @ 288</li> <li><code>cs3darknet_focus_l</code> - 80.3 @ 256, 80.9 @ 288</li> <li><code>vit_srelpos_small_patch16_224</code> - 81.1 @ 224, 82.1 @ 320</li> <li><code>vit_srelpos_medium_patch16_224</code> - 82.3 @ 224, 83.1 @ 320</li> <li><code>vit_relpos_small_patch16_cls_224</code> - 82.6 @ 224, 83.6 @ 320</li> <li><code>edgnext_small_rw</code> - 79.6 @ 224, 80.4 @ 320</li></ul></li> <li><code>cs3</code>, <code>darknet</code>, and <code>vit_relpos</code> weights above all trained on TPU thanks to TRC program! Rest trained on overheating GPUs.</li> <li>Hugging Face Hub support fixes verified, demo notebook TBA</li> <li>Pretrained weights / configs can be loaded externally (ie from local disk) w/ support for head adaptation.</li> <li>Add support to change image extensions scanned by <code>timm</code> datasets/parsers. See (<a href="https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103" rel="nofollow">https://github.com/rwightman/pytorch-image-models/pull/1274#issuecomment-1178303103</a>)</li> <li>Default ConvNeXt LayerNorm impl to use <code>F.layer_norm(x.permute(0, 2, 3, 1), ...).permute(0, 3, 1, 2)</code> via <code>LayerNorm2d</code> in all cases. <ul><li>a bit slower than previous custom impl on some hardware (ie Ampere w/ CL), but overall fewer regressions across wider HW / PyTorch version ranges.</li> <li>previous impl exists as <code>LayerNormExp2d</code> in <code>models/layers/norm.py</code></li></ul></li> <li>Numerous bug fixes</li> <li>Currently testing for imminent PyPi 0.6.x release</li> <li>LeViT pretraining of larger models still a WIP, they don’t train well / easily without distillation. Time to add distill support (finally)?</li> <li>ImageNet-22k weight training + finetune ongoing, work on multi-weight support (slowly) chugging along (there are a LOT of weights, sigh) …</li></ul> <h3 class="relative group"><a id="may-13-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-13-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 13, 2022</span></h3> <ul data-svelte-h="svelte-1ksbgxy"><li>Official Swin-V2 models and weights added from (<a href="https://github.com/microsoft/Swin-Transformer" rel="nofollow">https://github.com/microsoft/Swin-Transformer</a>). Cleaned up to support torchscript.</li> <li>Some refactoring for existing <code>timm</code> Swin-V2-CR impl, will likely do a bit more to bring parts closer to official and decide whether to merge some aspects.</li> <li>More Vision Transformer relative position / residual post-norm experiments (all trained on TPU thanks to TRC program)<ul><li><code>vit_relpos_small_patch16_224</code> - 81.5 @ 224, 82.5 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_relpos_medium_patch16_rpn_224</code> - 82.3 @ 224, 83.1 @ 320 — rel pos + res-post-norm, no class token, avg pool</li> <li><code>vit_relpos_medium_patch16_224</code> - 82.5 @ 224, 83.3 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_relpos_base_patch16_gapcls_224</code> - 82.8 @ 224, 83.9 @ 320 — rel pos, layer scale, class token, avg pool (by mistake)</li></ul></li> <li>Bring 512 dim, 8-head ‘medium’ ViT model variant back to life (after using in a pre DeiT ‘small’ model for first ViT impl back in 2020)</li> <li>Add ViT relative position support for switching btw existing impl and some additions in official Swin-V2 impl for future trials</li> <li>Sequencer2D impl (<a href="https://arxiv.org/abs/2205.01972" rel="nofollow">https://arxiv.org/abs/2205.01972</a>), added via PR from author (<a href="https://github.com/okojoalg" rel="nofollow">https://github.com/okojoalg</a>)</li></ul> <h3 class="relative group"><a id="may-2-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#may-2-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>May 2, 2022</span></h3> <ul data-svelte-h="svelte-1phzket"><li>Vision Transformer experiments adding Relative Position (Swin-V2 log-coord) (<code>vision_transformer_relpos.py</code>) and Residual Post-Norm branches (from Swin-V2) (<code>vision_transformer.py</code>)<ul><li><code>vit_relpos_base_patch32_plus_rpn_256</code> - 79.5 @ 256, 80.6 @ 320 — rel pos + extended width + res-post-norm, no class token, avg pool</li> <li><code>vit_relpos_base_patch16_224</code> - 82.5 @ 224, 83.6 @ 320 — rel pos, layer scale, no class token, avg pool</li> <li><code>vit_base_patch16_rpn_224</code> - 82.3 @ 224 — rel pos + res-post-norm, no class token, avg pool</li></ul></li> <li>Vision Transformer refactor to remove representation layer that was only used in initial vit and rarely used since with newer pretrain (ie <code>How to Train Your ViT</code>)</li> <li><code>vit_</code> models support removal of class token, use of global average pool, use of fc_norm (ala beit, mae).</li></ul> <h3 class="relative group"><a id="april-22-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#april-22-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>April 22, 2022</span></h3> <ul data-svelte-h="svelte-1whjijd"><li><code>timm</code> models are now officially supported in <a href="https://www.fast.ai/" rel="nofollow">fast.ai</a>! Just in time for the new Practical Deep Learning course. <code>timmdocs</code> documentation link updated to <a href="http://timm.fast.ai/" rel="nofollow">timm.fast.ai</a>.</li> <li>Two more model weights added in the TPU trained <a href="https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights" rel="nofollow">series</a>. Some In22k pretrain still in progress.<ul><li><code>seresnext101d_32x8d</code> - 83.69 @ 224, 84.35 @ 288</li> <li><code>seresnextaa101d_32x8d</code> (anti-aliased w/ AvgPool2d) - 83.85 @ 224, 84.57 @ 288</li></ul></li></ul> <h3 class="relative group"><a id="march-23-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-23-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 23, 2022</span></h3> <ul data-svelte-h="svelte-1dzgmu6"><li>Add <code>ParallelBlock</code> and <code>LayerScale</code> option to base vit models to support model configs in <a href="https://arxiv.org/abs/2203.09795" rel="nofollow">Three things everyone should know about ViT</a></li> <li><code>convnext_tiny_hnf</code> (head norm first) weights trained with (close to) A2 recipe, 82.2% top-1, could do better with more epochs.</li></ul> <h3 class="relative group"><a id="march-21-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#march-21-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>March 21, 2022</span></h3> <ul data-svelte-h="svelte-abrsee"><li>Merge <code>norm_norm_norm</code>. <strong>IMPORTANT</strong> this update for a coming 0.6.x release will likely de-stabilize the master branch for a while. Branch <a href="https://github.com/rwightman/pytorch-image-models/tree/0.5.x" rel="nofollow"><code>0.5.x</code></a> or a previous 0.5.x release can be used if stability is required.</li> <li>Significant weights update (all TPU trained) as described in this <a href="https://github.com/rwightman/pytorch-image-models/releases/tag/v0.1-tpu-weights" rel="nofollow">release</a><ul><li><code>regnety_040</code> - 82.3 @ 224, 82.96 @ 288</li> <li><code>regnety_064</code> - 83.0 @ 224, 83.65 @ 288</li> <li><code>regnety_080</code> - 83.17 @ 224, 83.86 @ 288</li> <li><code>regnetv_040</code> - 82.44 @ 224, 83.18 @ 288 (timm pre-act)</li> <li><code>regnetv_064</code> - 83.1 @ 224, 83.71 @ 288 (timm pre-act)</li> <li><code>regnetz_040</code> - 83.67 @ 256, 84.25 @ 320</li> <li><code>regnetz_040h</code> - 83.77 @ 256, 84.5 @ 320 (w/ extra fc in head)</li> <li><code>resnetv2_50d_gn</code> - 80.8 @ 224, 81.96 @ 288 (pre-act GroupNorm)</li> <li><code>resnetv2_50d_evos</code> 80.77 @ 224, 82.04 @ 288 (pre-act EvoNormS)</li> <li><code>regnetz_c16_evos</code> - 81.9 @ 256, 82.64 @ 320 (EvoNormS)</li> <li><code>regnetz_d8_evos</code> - 83.42 @ 256, 84.04 @ 320 (EvoNormS)</li> <li><code>xception41p</code> - 82 @ 299 (timm pre-act)</li> <li><code>xception65</code> - 83.17 @ 299</li> <li><code>xception65p</code> - 83.14 @ 299 (timm pre-act)</li> <li><code>resnext101_64x4d</code> - 82.46 @ 224, 83.16 @ 288</li> <li><code>seresnext101_32x8d</code> - 83.57 @ 224, 84.270 @ 288</li> <li><code>resnetrs200</code> - 83.85 @ 256, 84.44 @ 320</li></ul></li> <li>HuggingFace hub support fixed w/ initial groundwork for allowing alternative ‘config sources’ for pretrained model definitions and weights (generic local file / remote url support soon)</li> <li>SwinTransformer-V2 implementation added. Submitted by <a href="https://github.com/ChristophReich1996" rel="nofollow">Christoph Reich</a>. Training experiments and model changes by myself are ongoing so expect compat breaks.</li> <li>Swin-S3 (AutoFormerV2) models / weights added from <a href="https://github.com/microsoft/Cream/tree/main/AutoFormerV2" rel="nofollow">https://github.com/microsoft/Cream/tree/main/AutoFormerV2</a></li> <li>MobileViT models w/ weights adapted from <a href="https://github.com/apple/ml-cvnets" rel="nofollow">https://github.com/apple/ml-cvnets</a></li> <li>PoolFormer models w/ weights adapted from <a href="https://github.com/sail-sg/poolformer" rel="nofollow">https://github.com/sail-sg/poolformer</a></li> <li>VOLO models w/ weights adapted from <a href="https://github.com/sail-sg/volo" rel="nofollow">https://github.com/sail-sg/volo</a></li> <li>Significant work experimenting with non-BatchNorm norm layers such as EvoNorm, FilterResponseNorm, GroupNorm, etc</li> <li>Enhance support for alternate norm + act (‘NormAct’) layers added to a number of models, esp EfficientNet/MobileNetV3, RegNet, and aligned Xception</li> <li>Grouped conv support added to EfficientNet family</li> <li>Add ‘group matching’ API to all models to allow grouping model parameters for application of ‘layer-wise’ LR decay, lr scale added to LR scheduler</li> <li>Gradient checkpointing support added to many models</li> <li><code>forward_head(x, pre_logits=False)</code> fn added to all models to allow separate calls of <code>forward_features</code> + <code>forward_head</code></li> <li>All vision transformer and vision MLP models update to return non-pooled / non-token selected features from <code>foward_features</code>, for consistency with CNN models, token selection or pooling now applied in <code>forward_head</code></li></ul> <h3 class="relative group"><a id="feb-2-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#feb-2-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Feb 2, 2022</span></h3> <ul data-svelte-h="svelte-rnuh6y"><li><a href="https://github.com/Chris-hughes10" rel="nofollow">Chris Hughes</a> posted an exhaustive run through of <code>timm</code> on his blog yesterday. Well worth a read. <a href="https://towardsdatascience.com/getting-started-with-pytorch-image-models-timm-a-practitioners-guide-4e77b4bf9055" rel="nofollow">Getting Started with PyTorch Image Models (timm): A Practitioner’s Guide</a></li> <li>I’m currently prepping to merge the <code>norm_norm_norm</code> branch back to master (ver 0.6.x) in next week or so.<ul><li>The changes are more extensive than usual and may destabilize and break some model API use (aiming for full backwards compat). So, beware <code>pip install git+https://github.com/rwightman/pytorch-image-models</code> installs!</li> <li><code>0.5.x</code> releases and a <code>0.5.x</code> branch will remain stable with a cherry pick or two until dust clears. Recommend sticking to pypi install for a bit if you want stable.</li></ul></li></ul> <h3 class="relative group"><a id="jan-14-2022" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#jan-14-2022"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Jan 14, 2022</span></h3> <ul data-svelte-h="svelte-7l2ez"><li>Version 0.5.4 w/ release to be pushed to pypi. It’s been a while since last pypi update and riskier changes will be merged to main branch soon…</li> <li>Add ConvNeXT models /w weights from official impl (<a href="https://github.com/facebookresearch/ConvNeXt" rel="nofollow">https://github.com/facebookresearch/ConvNeXt</a>), a few perf tweaks, compatible with timm features</li> <li>Tried training a few small (~1.8-3M param) / mobile optimized models, a few are good so far, more on the way…<ul><li><code>mnasnet_small</code> - 65.6 top-1</li> <li><code>mobilenetv2_050</code> - 65.9</li> <li><code>lcnet_100/075/050</code> - 72.1 / 68.8 / 63.1</li> <li><code>semnasnet_075</code> - 73</li> <li><code>fbnetv3_b/d/g</code> - 79.1 / 79.7 / 82.0</li></ul></li> <li>TinyNet models added by <a href="https://github.com/rsomani95" rel="nofollow">rsomani95</a></li> <li>LCNet added via MobileNetV3 architecture</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/pytorch-image-models/blob/main/hfdocs/source/changes.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1b6cdiz = {
	assets: "/docs/timm/pr_2349/en",
	base: "/docs/timm/pr_2349/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/timm/pr_2349/en/_app/immutable/entry/start.f2138890.js"),
	import("/docs/timm/pr_2349/en/_app/immutable/entry/app.226c8755.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 2],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 254 kB
Xet hash:: 80ab4aff81f0854fa037c97927d91443fad1db95fb5825bf037a569a2bc91a01

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.