Buckets:
| import{s as Xt,o as Bt,n as Gt}from"../chunks/scheduler.182ea377.js";import{S as xt,i as vt,g as r,s as n,r as m,A as Rt,h as d,f as a,c as o,j as Ut,u,x as p,k as kt,y as Ht,a as l,v as c,d as M,t as f,w as h}from"../chunks/index.abf12888.js";import{T as It}from"../chunks/Tip.230e2334.js";import{C as q}from"../chunks/CodeBlock.57fe6e13.js";import{H as ct}from"../chunks/Heading.16916d63.js";function Yt(z){let s,g='💡 Learn more about how to create an image dataset for training in the <a href="https://huggingface.co/docs/datasets/image_dataset" rel="nofollow">Create an image dataset</a> guide.';return{c(){s=r("p"),s.innerHTML=g},l(i){s=d(i,"P",{"data-svelte-h":!0}),p(s)!=="svelte-105stmr"&&(s.innerHTML=g)},m(i,T){l(i,s,T)},p:Gt,d(i){i&&a(s)}}}function Ct(z){let s,g='💡 For more details and context about creating and uploading a dataset to the Hub, take a look at the <a href="https://huggingface.co/blog/image-search-datasets" rel="nofollow">Image search with 🤗 Datasets</a> post.';return{c(){s=r("p"),s.innerHTML=g},l(i){s=d(i,"P",{"data-svelte-h":!0}),p(s)!=="svelte-1wagfs8"&&(s.innerHTML=g)},m(i,T){l(i,s,T)},p:Gt,d(i){i&&a(s)}}}function Wt(z){let s,g,i,T,w,E,j,Mt='There are many datasets on the <a href="https://huggingface.co/datasets?task_categories=task_categories:text-to-image&sort=downloads" rel="nofollow">Hub</a> to train a model on, but if you can’t find one you’re interested in or want to use your own, you can create a dataset with the 🤗 <a href="hf.co/docs/datasets">Datasets</a> library. The dataset structure depends on the task you want to train your model on. The most basic dataset structure is a directory of images for tasks like unconditional image generation. Another dataset structure may be a directory of images and a text file containing their corresponding text captions for tasks like text-to-image generation.',L,$,ft="This guide will show you two ways to create a dataset to finetune on:",A,_,ht="<li>provide a folder of images to the <code>--train_data_dir</code> argument</li> <li>upload a dataset to the Hub and pass the dataset repository id to the <code>--dataset_name</code> argument</li>",Q,y,V,Z,S,b,gt='For unconditional generation, you can provide your own dataset as a folder of images. The training script uses the <a href="https://huggingface.co/docs/datasets/en/image_dataset#imagefolder" rel="nofollow"><code>ImageFolder</code></a> builder from 🤗 Datasets to automatically build a dataset from the folder. Your directory structure should look like:',P,U,D,k,Tt="Pass the path to the dataset directory to the <code>--train_data_dir</code> argument, and then you can start training:",K,I,O,G,tt,J,et,X,yt='Start by creating a dataset with the <a href="https://huggingface.co/docs/datasets/image_load#imagefolder" rel="nofollow"><code>ImageFolder</code></a> feature, which creates an <code>image</code> column containing the PIL-encoded images.',at,B,Jt="You can use the <code>data_dir</code> or <code>data_files</code> parameters to specify the location of the dataset. The <code>data_files</code> parameter supports mapping specific files to dataset splits like <code>train</code> or <code>test</code>:",lt,x,st,v,wt='Then use the <a href="https://huggingface.co/docs/datasets/v2.16.1/en/package_reference/main_classes#datasets.Dataset.push_to_hub" rel="nofollow">push_to_hub</a> method to upload the dataset to the Hub:',nt,R,ot,H,jt="Now the dataset is available for training by passing the dataset name to the <code>--dataset_name</code> argument:",it,Y,rt,C,dt,W,$t="Now that you’ve created a dataset, you can plug it into the <code>train_data_dir</code> (if your dataset is local) or <code>dataset_name</code> (if your dataset is on the Hub) arguments of a training script.",pt,F,_t='For your next steps, feel free to try and use your dataset to train a model for <a href="unconditional_training">unconditional generation</a> or <a href="text2image">text-to-image generation</a>!',mt,N,ut;return w=new ct({props:{title:"Create a dataset for training",local:"create-a-dataset-for-training",headingTag:"h1"}}),y=new It({props:{$$slots:{default:[Yt]},$$scope:{ctx:z}}}),Z=new ct({props:{title:"Provide a dataset as a folder",local:"provide-a-dataset-as-a-folder",headingTag:"h2"}}),U=new q({props:{code:"ZGF0YV9kaXIlMkZ4eHgucG5nJTBBZGF0YV9kaXIlMkZ4eHkucG5nJTBBZGF0YV9kaXIlMkYlNUIuLi4lNUQlMkZ4eHoucG5n",highlighted:`data_dir/xxx.png | |
| data_dir/xxy.png | |
| data_dir/[...]/xxz.png`,wrap:!1}}),I=new q({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMHRyYWluX3VuY29uZGl0aW9uYWwucHklMjAlNUMlMEElMjAlMjAlMjAlMjAtLXRyYWluX2RhdGFfZGlyJTIwJTNDcGF0aC10by10cmFpbi1kaXJlY3RvcnklM0UlMjAlNUMlMEElMjAlMjAlMjAlMjAlM0NvdGhlci1hcmd1bWVudHMlM0U=",highlighted:`accelerate launch train_unconditional.py \\ | |
| --train_data_dir <path-to-train-directory> \\ | |
| <other-arguments>`,wrap:!1}}),G=new ct({props:{title:"Upload your data to the Hub",local:"upload-your-data-to-the-hub",headingTag:"h2"}}),J=new It({props:{$$slots:{default:[Ct]},$$scope:{ctx:z}}}),x=new q({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBJTIzJTIwZXhhbXBsZSUyMDElM0ElMjBsb2NhbCUyMGZvbGRlciUwQWRhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyaW1hZ2Vmb2xkZXIlMjIlMkMlMjBkYXRhX2RpciUzRCUyMnBhdGhfdG9feW91cl9mb2xkZXIlMjIpJTBBJTBBJTIzJTIwZXhhbXBsZSUyMDIlM0ElMjBsb2NhbCUyMGZpbGVzJTIwKHN1cHBvcnRlZCUyMGZvcm1hdHMlMjBhcmUlMjB0YXIlMkMlMjBnemlwJTJDJTIwemlwJTJDJTIweHolMkMlMjByYXIlMkMlMjB6c3RkKSUwQWRhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTIyaW1hZ2Vmb2xkZXIlMjIlMkMlMjBkYXRhX2ZpbGVzJTNEJTIycGF0aF90b196aXBfZmlsZSUyMiklMEElMEElMjMlMjBleGFtcGxlJTIwMyUzQSUyMHJlbW90ZSUyMGZpbGVzJTIwKHN1cHBvcnRlZCUyMGZvcm1hdHMlMjBhcmUlMjB0YXIlMkMlMjBnemlwJTJDJTIwemlwJTJDJTIweHolMkMlMjByYXIlMkMlMjB6c3RkKSUwQWRhdGFzZXQlMjAlM0QlMjBsb2FkX2RhdGFzZXQoJTBBJTIwJTIwJTIwJTIwJTIyaW1hZ2Vmb2xkZXIlMjIlMkMlMEElMjAlMjAlMjAlMjBkYXRhX2ZpbGVzJTNEJTIyaHR0cHMlM0ElMkYlMkZkb3dubG9hZC5taWNyb3NvZnQuY29tJTJGZG93bmxvYWQlMkYzJTJGRSUyRjElMkYzRTFDM0YyMS1FQ0RCLTQ4NjktODM2OC02REVCQTc3QjkxOUYlMkZrYWdnbGVjYXRzYW5kZG9nc18zMzY3YS56aXAlMjIlMkMlMEEpJTBBJTBBJTIzJTIwZXhhbXBsZSUyMDQlM0ElMjBwcm92aWRpbmclMjBzZXZlcmFsJTIwc3BsaXRzJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMEElMjAlMjAlMjAlMjAlMjJpbWFnZWZvbGRlciUyMiUyQyUyMGRhdGFfZmlsZXMlM0QlN0IlMjJ0cmFpbiUyMiUzQSUyMCU1QiUyMnBhdGglMkZ0byUyRmZpbGUxJTIyJTJDJTIwJTIycGF0aCUyRnRvJTJGZmlsZTIlMjIlNUQlMkMlMjAlMjJ0ZXN0JTIyJTNBJTIwJTVCJTIycGF0aCUyRnRvJTJGZmlsZTMlMjIlMkMlMjAlMjJwYXRoJTJGdG8lMkZmaWxlNCUyMiU1RCU3RCUwQSk=",highlighted:`<span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-comment"># example 1: local folder</span> | |
| dataset = load_dataset(<span class="hljs-string">"imagefolder"</span>, data_dir=<span class="hljs-string">"path_to_your_folder"</span>) | |
| <span class="hljs-comment"># example 2: local files (supported formats are tar, gzip, zip, xz, rar, zstd)</span> | |
| dataset = load_dataset(<span class="hljs-string">"imagefolder"</span>, data_files=<span class="hljs-string">"path_to_zip_file"</span>) | |
| <span class="hljs-comment"># example 3: remote files (supported formats are tar, gzip, zip, xz, rar, zstd)</span> | |
| dataset = load_dataset( | |
| <span class="hljs-string">"imagefolder"</span>, | |
| data_files=<span class="hljs-string">"https://download.microsoft.com/download/3/E/1/3E1C3F21-ECDB-4869-8368-6DEBA77B919F/kagglecatsanddogs_3367a.zip"</span>, | |
| ) | |
| <span class="hljs-comment"># example 4: providing several splits</span> | |
| dataset = load_dataset( | |
| <span class="hljs-string">"imagefolder"</span>, data_files={<span class="hljs-string">"train"</span>: [<span class="hljs-string">"path/to/file1"</span>, <span class="hljs-string">"path/to/file2"</span>], <span class="hljs-string">"test"</span>: [<span class="hljs-string">"path/to/file3"</span>, <span class="hljs-string">"path/to/file4"</span>]} | |
| )`,wrap:!1}}),R=new q({props:{code:"JTIzJTIwYXNzdW1pbmclMjB5b3UlMjBoYXZlJTIwcmFuJTIwdGhlJTIwaHVnZ2luZ2ZhY2UtY2xpJTIwbG9naW4lMjBjb21tYW5kJTIwaW4lMjBhJTIwdGVybWluYWwlMEFkYXRhc2V0LnB1c2hfdG9faHViKCUyMm5hbWVfb2ZfeW91cl9kYXRhc2V0JTIyKSUwQSUwQSUyMyUyMGlmJTIweW91JTIwd2FudCUyMHRvJTIwcHVzaCUyMHRvJTIwYSUyMHByaXZhdGUlMjByZXBvJTJDJTIwc2ltcGx5JTIwcGFzcyUyMHByaXZhdGUlM0RUcnVlJTNBJTBBZGF0YXNldC5wdXNoX3RvX2h1YiglMjJuYW1lX29mX3lvdXJfZGF0YXNldCUyMiUyQyUyMHByaXZhdGUlM0RUcnVlKQ==",highlighted:`<span class="hljs-comment"># assuming you have ran the huggingface-cli login command in a terminal</span> | |
| dataset.push_to_hub(<span class="hljs-string">"name_of_your_dataset"</span>) | |
| <span class="hljs-comment"># if you want to push to a private repo, simply pass private=True:</span> | |
| dataset.push_to_hub(<span class="hljs-string">"name_of_your_dataset"</span>, private=<span class="hljs-literal">True</span>)`,wrap:!1}}),Y=new q({props:{code:"YWNjZWxlcmF0ZSUyMGxhdW5jaCUyMC0tbWl4ZWRfcHJlY2lzaW9uJTNEJTIyZnAxNiUyMiUyMCUyMHRyYWluX3RleHRfdG9faW1hZ2UucHklMjAlNUMlMEElMjAlMjAtLXByZXRyYWluZWRfbW9kZWxfbmFtZV9vcl9wYXRoJTNEJTIycnVud2F5bWwlMkZzdGFibGUtZGlmZnVzaW9uLXYxLTUlMjIlMjAlNUMlMEElMjAlMjAtLWRhdGFzZXRfbmFtZSUzRCUyMm5hbWVfb2ZfeW91cl9kYXRhc2V0JTIyJTIwJTVDJTBBJTIwJTIwJTNDb3RoZXItYXJndW1lbnRzJTNF",highlighted:`accelerate launch --mixed_precision=<span class="hljs-string">"fp16"</span> train_text_to_image.py \\ | |
| --pretrained_model_name_or_path=<span class="hljs-string">"runwayml/stable-diffusion-v1-5"</span> \\ | |
| --dataset_name=<span class="hljs-string">"name_of_your_dataset"</span> \\ | |
| <other-arguments>`,wrap:!1}}),C=new ct({props:{title:"Next steps",local:"next-steps",headingTag:"h2"}}),{c(){s=r("meta"),g=n(),i=r("p"),T=n(),m(w.$$.fragment),E=n(),j=r("p"),j.innerHTML=Mt,L=n(),$=r("p"),$.textContent=ft,A=n(),_=r("ul"),_.innerHTML=ht,Q=n(),m(y.$$.fragment),V=n(),m(Z.$$.fragment),S=n(),b=r("p"),b.innerHTML=gt,P=n(),m(U.$$.fragment),D=n(),k=r("p"),k.innerHTML=Tt,K=n(),m(I.$$.fragment),O=n(),m(G.$$.fragment),tt=n(),m(J.$$.fragment),et=n(),X=r("p"),X.innerHTML=yt,at=n(),B=r("p"),B.innerHTML=Jt,lt=n(),m(x.$$.fragment),st=n(),v=r("p"),v.innerHTML=wt,nt=n(),m(R.$$.fragment),ot=n(),H=r("p"),H.innerHTML=jt,it=n(),m(Y.$$.fragment),rt=n(),m(C.$$.fragment),dt=n(),W=r("p"),W.innerHTML=$t,pt=n(),F=r("p"),F.innerHTML=_t,mt=n(),N=r("p"),this.h()},l(t){const e=Rt("svelte-u9bgzb",document.head);s=d(e,"META",{name:!0,content:!0}),e.forEach(a),g=o(t),i=d(t,"P",{}),Ut(i).forEach(a),T=o(t),u(w.$$.fragment,t),E=o(t),j=d(t,"P",{"data-svelte-h":!0}),p(j)!=="svelte-x3y1lw"&&(j.innerHTML=Mt),L=o(t),$=d(t,"P",{"data-svelte-h":!0}),p($)!=="svelte-1ki6nhc"&&($.textContent=ft),A=o(t),_=d(t,"UL",{"data-svelte-h":!0}),p(_)!=="svelte-136e0w1"&&(_.innerHTML=ht),Q=o(t),u(y.$$.fragment,t),V=o(t),u(Z.$$.fragment,t),S=o(t),b=d(t,"P",{"data-svelte-h":!0}),p(b)!=="svelte-1tbm8ke"&&(b.innerHTML=gt),P=o(t),u(U.$$.fragment,t),D=o(t),k=d(t,"P",{"data-svelte-h":!0}),p(k)!=="svelte-16r2a6g"&&(k.innerHTML=Tt),K=o(t),u(I.$$.fragment,t),O=o(t),u(G.$$.fragment,t),tt=o(t),u(J.$$.fragment,t),et=o(t),X=d(t,"P",{"data-svelte-h":!0}),p(X)!=="svelte-82qx4b"&&(X.innerHTML=yt),at=o(t),B=d(t,"P",{"data-svelte-h":!0}),p(B)!=="svelte-10hf74w"&&(B.innerHTML=Jt),lt=o(t),u(x.$$.fragment,t),st=o(t),v=d(t,"P",{"data-svelte-h":!0}),p(v)!=="svelte-18c1kpf"&&(v.innerHTML=wt),nt=o(t),u(R.$$.fragment,t),ot=o(t),H=d(t,"P",{"data-svelte-h":!0}),p(H)!=="svelte-19t9o3p"&&(H.innerHTML=jt),it=o(t),u(Y.$$.fragment,t),rt=o(t),u(C.$$.fragment,t),dt=o(t),W=d(t,"P",{"data-svelte-h":!0}),p(W)!=="svelte-1ata9m5"&&(W.innerHTML=$t),pt=o(t),F=d(t,"P",{"data-svelte-h":!0}),p(F)!=="svelte-oyixgb"&&(F.innerHTML=_t),mt=o(t),N=d(t,"P",{}),Ut(N).forEach(a),this.h()},h(){kt(s,"name","hf:doc:metadata"),kt(s,"content",Ft)},m(t,e){Ht(document.head,s),l(t,g,e),l(t,i,e),l(t,T,e),c(w,t,e),l(t,E,e),l(t,j,e),l(t,L,e),l(t,$,e),l(t,A,e),l(t,_,e),l(t,Q,e),c(y,t,e),l(t,V,e),c(Z,t,e),l(t,S,e),l(t,b,e),l(t,P,e),c(U,t,e),l(t,D,e),l(t,k,e),l(t,K,e),c(I,t,e),l(t,O,e),c(G,t,e),l(t,tt,e),c(J,t,e),l(t,et,e),l(t,X,e),l(t,at,e),l(t,B,e),l(t,lt,e),c(x,t,e),l(t,st,e),l(t,v,e),l(t,nt,e),c(R,t,e),l(t,ot,e),l(t,H,e),l(t,it,e),c(Y,t,e),l(t,rt,e),c(C,t,e),l(t,dt,e),l(t,W,e),l(t,pt,e),l(t,F,e),l(t,mt,e),l(t,N,e),ut=!0},p(t,[e]){const Zt={};e&2&&(Zt.$$scope={dirty:e,ctx:t}),y.$set(Zt);const bt={};e&2&&(bt.$$scope={dirty:e,ctx:t}),J.$set(bt)},i(t){ut||(M(w.$$.fragment,t),M(y.$$.fragment,t),M(Z.$$.fragment,t),M(U.$$.fragment,t),M(I.$$.fragment,t),M(G.$$.fragment,t),M(J.$$.fragment,t),M(x.$$.fragment,t),M(R.$$.fragment,t),M(Y.$$.fragment,t),M(C.$$.fragment,t),ut=!0)},o(t){f(w.$$.fragment,t),f(y.$$.fragment,t),f(Z.$$.fragment,t),f(U.$$.fragment,t),f(I.$$.fragment,t),f(G.$$.fragment,t),f(J.$$.fragment,t),f(x.$$.fragment,t),f(R.$$.fragment,t),f(Y.$$.fragment,t),f(C.$$.fragment,t),ut=!1},d(t){t&&(a(g),a(i),a(T),a(E),a(j),a(L),a($),a(A),a(_),a(Q),a(V),a(S),a(b),a(P),a(D),a(k),a(K),a(O),a(tt),a(et),a(X),a(at),a(B),a(lt),a(st),a(v),a(nt),a(ot),a(H),a(it),a(rt),a(dt),a(W),a(pt),a(F),a(mt),a(N)),a(s),h(w,t),h(y,t),h(Z,t),h(U,t),h(I,t),h(G,t),h(J,t),h(x,t),h(R,t),h(Y,t),h(C,t)}}}const Ft='{"title":"Create a dataset for training","local":"create-a-dataset-for-training","sections":[{"title":"Provide a dataset as a folder","local":"provide-a-dataset-as-a-folder","sections":[],"depth":2},{"title":"Upload your data to the Hub","local":"upload-your-data-to-the-hub","sections":[],"depth":2},{"title":"Next steps","local":"next-steps","sections":[],"depth":2}],"depth":1}';function zt(z){return Bt(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Qt extends xt{constructor(s){super(),vt(this,s,zt,Wt,Xt,{})}}export{Qt as component}; | |
Xet Storage Details
- Size:
- 13.7 kB
- Xet hash:
- 5b1b6ebe9c413fa1278ca96a2a0754e1a8acb0393a9ebb733079578bfaadc5d5
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.