Buckets:

rtrm's picture
download
raw
12.8 kB
import{s as Ze,n as Ae,o as Ge}from"../chunks/scheduler.d75c11ed.js";import{S as Ee,i as Pe,e as p,s as l,c as m,h as Qe,a as r,d as s,b as n,f as Re,g as o,j as i,k as ye,l as De,m as a,n as c,t as f,o as h,p as u}from"../chunks/index.4ec9dfe9.js";import{C as Ne,H as je,E as Fe}from"../chunks/MermaidChart.svelte_svelte_type_style_lang.ee0f129e.js";import{C as D}from"../chunks/CodeBlock.5919a092.js";function Xe(be){let d,N,P,F,g,X,y,q,j,we="This guide shows specific methods for processing image datasets. Learn how to:",W,b,Te='<li>Use <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a> with image dataset.</li> <li>Apply data augmentations to a dataset with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a>.</li>',V,w,_e='For a guide on how to process any type of dataset, take a look at the <a class="underline decoration-sky-400 decoration-2 font-semibold" href="./process">general process guide</a>.',Y,T,K,_,$e='The <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a> function can apply transforms over an entire dataset.',S,$,ve='For example, create a basic <a href="https://pytorch.org/vision/stable/generated/torchvision.transforms.Resize.html" rel="nofollow"><code>Resize</code></a> function:',O,v,ee,x,xe='Now use the <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a> function to resize the entire dataset, and set <code>batched=True</code> to speed up the process by accepting batches of examples. The transform returns <code>pixel_values</code> as a cacheable <code>PIL.Image</code> object:',te,J,se,U,Je='The cache file saves time because you don’t have to execute the same transform twice. The <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a> function is best for operations you only run once per training - like resizing an image - instead of using it for operations executed for each epoch, like data augmentations.',ae,C,Ue='<a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a> takes up some memory, but you can reduce its memory requirements with the following parameters:',le,k,Ce='<li><a href="./package_reference/main_classes#datasets.DatasetDict.map.batch_size"><code>batch_size</code></a> determines the number of examples that are processed in one call to the transform function.</li> <li><a href="./package_reference/main_classes#datasets.DatasetDict.map.writer_batch_size"><code>writer_batch_size</code></a> determines the number of processed examples that are kept in memory before they are stored away.</li>',ne,H,ke='Both parameter values default to 1000, which can be expensive if you are storing images. Lower these values to use less memory when you use <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.map">map()</a>.',pe,I,re,L,He='🤗 Datasets applies data augmentations from any library or package to your dataset. Transforms can be applied on-the-fly on batches of data with <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a>, which consumes less disk space.',ie,M,Ie='<p>The following example uses <a href="https://pytorch.org/vision/stable/index.html" rel="nofollow">torchvision</a>, but feel free to use other data augmentation libraries like <a href="https://albumentations.ai/docs/" rel="nofollow">Albumentations</a>, <a href="https://kornia.readthedocs.io/en/latest/" rel="nofollow">Kornia</a>, and <a href="https://imgaug.readthedocs.io/en/latest/" rel="nofollow">imgaug</a>.</p>',me,B,Le="For example, if you’d like to change the color properties of an image randomly:",oe,z,ce,R,Be="Create a function to apply the <code>ColorJitter</code> transform:",fe,Z,he,A,ze='Apply the transform with the <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a> function:',ue,G,de,E,Me,Q,ge;return g=new Ne({props:{containerStyle:"float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"}}),y=new je({props:{title:"Process image data",local:"process-image-data",headingTag:"h1"}}),T=new je({props:{title:"Map",local:"map",headingTag:"h2"}}),v=new D({props:{code:"ZGVmJTIwdHJhbnNmb3JtcyhleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1QmltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKS5yZXNpemUoKDEwMCUyQzEwMCkpJTIwZm9yJTIwaW1hZ2UlMjBpbiUyMGV4YW1wbGVzJTVCJTIyaW1hZ2UlMjIlNUQlNUQlMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxlcw==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = [image.convert(<span class="hljs-string">&quot;RGB&quot;</span>).resize((<span class="hljs-number">100</span>,<span class="hljs-number">100</span>)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`,wrap:!1}}),J=new D({props:{code:"ZGF0YXNldCUyMCUzRCUyMGRhdGFzZXQubWFwKHRyYW5zZm9ybXMlMkMlMjByZW1vdmVfY29sdW1ucyUzRCU1QiUyMmltYWdlJTIyJTVEJTJDJTIwYmF0Y2hlZCUzRFRydWUpJTBBZGF0YXNldCU1QjAlNUQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = dataset.<span class="hljs-built_in">map</span>(transforms, remove_columns=[<span class="hljs-string">&quot;image&quot;</span>], batched=<span class="hljs-literal">True</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-number">0</span>]
{<span class="hljs-string">&#x27;label&#x27;</span>: <span class="hljs-number">6</span>,
<span class="hljs-string">&#x27;pixel_values&#x27;</span>: &lt;PIL.PngImagePlugin.PngImageFile image mode=RGB size=100x100 at <span class="hljs-number">0x7F058237BB10</span>&gt;}`,wrap:!1}}),I=new je({props:{title:"Apply transforms",local:"apply-transforms",headingTag:"h2"}}),z=new D({props:{code:"ZnJvbSUyMHRvcmNodmlzaW9uLnRyYW5zZm9ybXMlMjBpbXBvcnQlMjBDb21wb3NlJTJDJTIwQ29sb3JKaXR0ZXIlMkMlMjBUb1RlbnNvciUwQSUwQWppdHRlciUyMCUzRCUyMENvbXBvc2UoJTBBJTIwJTIwJTIwJTIwJTVCJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwQ29sb3JKaXR0ZXIoYnJpZ2h0bmVzcyUzRDAuMjUlMkMlMjBjb250cmFzdCUzRDAuMjUlMkMlMjBzYXR1cmF0aW9uJTNEMC4yNSUyQyUyMGh1ZSUzRDAuNyklMkMlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBUb1RlbnNvcigpJTJDJTBBJTIwJTIwJTIwJTIwJTVEJTBBKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> Compose, ColorJitter, ToTensor
<span class="hljs-meta">&gt;&gt;&gt; </span>jitter = Compose(
<span class="hljs-meta">... </span> [
<span class="hljs-meta">... </span> ColorJitter(brightness=<span class="hljs-number">0.25</span>, contrast=<span class="hljs-number">0.25</span>, saturation=<span class="hljs-number">0.25</span>, hue=<span class="hljs-number">0.7</span>),
<span class="hljs-meta">... </span> ToTensor(),
<span class="hljs-meta">... </span> ]
<span class="hljs-meta">... </span>)`,wrap:!1}}),Z=new D({props:{code:"ZGVmJTIwdHJhbnNmb3JtcyhleGFtcGxlcyklM0ElMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMnBpeGVsX3ZhbHVlcyUyMiU1RCUyMCUzRCUyMCU1QmppdHRlcihpbWFnZS5jb252ZXJ0KCUyMlJHQiUyMikpJTIwZm9yJTIwaW1hZ2UlMjBpbiUyMGV4YW1wbGVzJTVCJTIyaW1hZ2UlMjIlNUQlNUQlMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxlcw==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = [jitter(image.convert(<span class="hljs-string">&quot;RGB&quot;</span>)) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`,wrap:!1}}),G=new D({props:{code:"ZGF0YXNldC5zZXRfdHJhbnNmb3JtKHRyYW5zZm9ybXMp",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset.set_transform(transforms)',wrap:!1}}),E=new Fe({props:{source:"https://github.com/huggingface/datasets/blob/main/docs/source/image_process.mdx"}}),{c(){d=p("meta"),N=l(),P=p("p"),F=l(),m(g.$$.fragment),X=l(),m(y.$$.fragment),q=l(),j=p("p"),j.textContent=we,W=l(),b=p("ul"),b.innerHTML=Te,V=l(),w=p("p"),w.innerHTML=_e,Y=l(),m(T.$$.fragment),K=l(),_=p("p"),_.innerHTML=$e,S=l(),$=p("p"),$.innerHTML=ve,O=l(),m(v.$$.fragment),ee=l(),x=p("p"),x.innerHTML=xe,te=l(),m(J.$$.fragment),se=l(),U=p("p"),U.innerHTML=Je,ae=l(),C=p("p"),C.innerHTML=Ue,le=l(),k=p("ul"),k.innerHTML=Ce,ne=l(),H=p("p"),H.innerHTML=ke,pe=l(),m(I.$$.fragment),re=l(),L=p("p"),L.innerHTML=He,ie=l(),M=p("blockquote"),M.innerHTML=Ie,me=l(),B=p("p"),B.textContent=Le,oe=l(),m(z.$$.fragment),ce=l(),R=p("p"),R.innerHTML=Be,fe=l(),m(Z.$$.fragment),he=l(),A=p("p"),A.innerHTML=ze,ue=l(),m(G.$$.fragment),de=l(),m(E.$$.fragment),Me=l(),Q=p("p"),this.h()},l(e){const t=Qe("svelte-u9bgzb",document.head);d=r(t,"META",{name:!0,content:!0}),t.forEach(s),N=n(e),P=r(e,"P",{}),Re(P).forEach(s),F=n(e),o(g.$$.fragment,e),X=n(e),o(y.$$.fragment,e),q=n(e),j=r(e,"P",{"data-svelte-h":!0}),i(j)!=="svelte-7uwdmt"&&(j.textContent=we),W=n(e),b=r(e,"UL",{"data-svelte-h":!0}),i(b)!=="svelte-i5lv9t"&&(b.innerHTML=Te),V=n(e),w=r(e,"P",{"data-svelte-h":!0}),i(w)!=="svelte-3s2bzp"&&(w.innerHTML=_e),Y=n(e),o(T.$$.fragment,e),K=n(e),_=r(e,"P",{"data-svelte-h":!0}),i(_)!=="svelte-2tbfj"&&(_.innerHTML=$e),S=n(e),$=r(e,"P",{"data-svelte-h":!0}),i($)!=="svelte-3s9pec"&&($.innerHTML=ve),O=n(e),o(v.$$.fragment,e),ee=n(e),x=r(e,"P",{"data-svelte-h":!0}),i(x)!=="svelte-gt88ab"&&(x.innerHTML=xe),te=n(e),o(J.$$.fragment,e),se=n(e),U=r(e,"P",{"data-svelte-h":!0}),i(U)!=="svelte-grsg6i"&&(U.innerHTML=Je),ae=n(e),C=r(e,"P",{"data-svelte-h":!0}),i(C)!=="svelte-1a9yanm"&&(C.innerHTML=Ue),le=n(e),k=r(e,"UL",{"data-svelte-h":!0}),i(k)!=="svelte-1uuk8jl"&&(k.innerHTML=Ce),ne=n(e),H=r(e,"P",{"data-svelte-h":!0}),i(H)!=="svelte-1wc9vn1"&&(H.innerHTML=ke),pe=n(e),o(I.$$.fragment,e),re=n(e),L=r(e,"P",{"data-svelte-h":!0}),i(L)!=="svelte-16b3vvc"&&(L.innerHTML=He),ie=n(e),M=r(e,"BLOCKQUOTE",{class:!0,"data-svelte-h":!0}),i(M)!=="svelte-1pz7lhk"&&(M.innerHTML=Ie),me=n(e),B=r(e,"P",{"data-svelte-h":!0}),i(B)!=="svelte-1dscypy"&&(B.textContent=Le),oe=n(e),o(z.$$.fragment,e),ce=n(e),R=r(e,"P",{"data-svelte-h":!0}),i(R)!=="svelte-1qyarbp"&&(R.innerHTML=Be),fe=n(e),o(Z.$$.fragment,e),he=n(e),A=r(e,"P",{"data-svelte-h":!0}),i(A)!=="svelte-16irt4n"&&(A.innerHTML=ze),ue=n(e),o(G.$$.fragment,e),de=n(e),o(E.$$.fragment,e),Me=n(e),Q=r(e,"P",{}),Re(Q).forEach(s),this.h()},h(){ye(d,"name","hf:doc:metadata"),ye(d,"content",qe),ye(M,"class","tip")},m(e,t){De(document.head,d),a(e,N,t),a(e,P,t),a(e,F,t),c(g,e,t),a(e,X,t),c(y,e,t),a(e,q,t),a(e,j,t),a(e,W,t),a(e,b,t),a(e,V,t),a(e,w,t),a(e,Y,t),c(T,e,t),a(e,K,t),a(e,_,t),a(e,S,t),a(e,$,t),a(e,O,t),c(v,e,t),a(e,ee,t),a(e,x,t),a(e,te,t),c(J,e,t),a(e,se,t),a(e,U,t),a(e,ae,t),a(e,C,t),a(e,le,t),a(e,k,t),a(e,ne,t),a(e,H,t),a(e,pe,t),c(I,e,t),a(e,re,t),a(e,L,t),a(e,ie,t),a(e,M,t),a(e,me,t),a(e,B,t),a(e,oe,t),c(z,e,t),a(e,ce,t),a(e,R,t),a(e,fe,t),c(Z,e,t),a(e,he,t),a(e,A,t),a(e,ue,t),c(G,e,t),a(e,de,t),c(E,e,t),a(e,Me,t),a(e,Q,t),ge=!0},p:Ae,i(e){ge||(f(g.$$.fragment,e),f(y.$$.fragment,e),f(T.$$.fragment,e),f(v.$$.fragment,e),f(J.$$.fragment,e),f(I.$$.fragment,e),f(z.$$.fragment,e),f(Z.$$.fragment,e),f(G.$$.fragment,e),f(E.$$.fragment,e),ge=!0)},o(e){h(g.$$.fragment,e),h(y.$$.fragment,e),h(T.$$.fragment,e),h(v.$$.fragment,e),h(J.$$.fragment,e),h(I.$$.fragment,e),h(z.$$.fragment,e),h(Z.$$.fragment,e),h(G.$$.fragment,e),h(E.$$.fragment,e),ge=!1},d(e){e&&(s(N),s(P),s(F),s(X),s(q),s(j),s(W),s(b),s(V),s(w),s(Y),s(K),s(_),s(S),s($),s(O),s(ee),s(x),s(te),s(se),s(U),s(ae),s(C),s(le),s(k),s(ne),s(H),s(pe),s(re),s(L),s(ie),s(M),s(me),s(B),s(oe),s(ce),s(R),s(fe),s(he),s(A),s(ue),s(de),s(Me),s(Q)),s(d),u(g,e),u(y,e),u(T,e),u(v,e),u(J,e),u(I,e),u(z,e),u(Z,e),u(G,e),u(E,e)}}}const qe='{"title":"Process image data","local":"process-image-data","sections":[{"title":"Map","local":"map","sections":[],"depth":2},{"title":"Apply transforms","local":"apply-transforms","sections":[],"depth":2}],"depth":1}';function We(be){return Ge(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Oe extends Ee{constructor(d){super(),Pe(this,d,We,Xe,Ze,{})}}export{Oe as component};

Xet Storage Details

Size:
12.8 kB
·
Xet hash:
2ff2b3e53256c53d5843332d6b8ab188f074902762d6ca7b2c2e5bb4ed0f089a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.