Buckets:

hf-doc-build/doc / transformers /main /ro /image_processors.html
HuggingFaceDocBuilder's picture
download
raw
42.3 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Procesatoare de imagini&quot;,&quot;local&quot;:&quot;procesatoare-de-imagini&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Clasele de procesatoare de imagini&quot;,&quot;local&quot;:&quot;clasele-de-procesatoare-de-imagini&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Procesatoare cu backend torchvision&quot;,&quot;local&quot;:&quot;procesatoare-cu-backend-torchvision&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocesare&quot;,&quot;local&quot;:&quot;preprocesare&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Padding&quot;,&quot;local&quot;:&quot;padding&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"/>
<link href="/docs/transformers/main/ro/_app/immutable/entry/start.Cj0_j3du.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/CUCuKvpH.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/Dm_uEeBC.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/entry/app.2x2TdrVN.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/BFzpZI28.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/DM0RASQS.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/DsnmJJEf.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/DJ5hodCp.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/GYau11Sm.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/nodes/0.BonRETMf.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/B1k23FAg.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/utTYfj_T.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/nodes/15.Bjm9GZle.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/BcKuJYrM.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/BGhUAj7r.js" rel="modulepreload">
<link href="/docs/transformers/main/ro/_app/immutable/chunks/BnB0dYtQ.js" rel="modulepreload">
<!--1iac3mg--><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Procesatoare de imagini&quot;,&quot;local&quot;:&quot;procesatoare-de-imagini&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Clasele de procesatoare de imagini&quot;,&quot;local&quot;:&quot;clasele-de-procesatoare-de-imagini&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Procesatoare cu backend torchvision&quot;,&quot;local&quot;:&quot;procesatoare-cu-backend-torchvision&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Preprocesare&quot;,&quot;local&quot;:&quot;preprocesare&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Padding&quot;,&quot;local&quot;:&quot;padding&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2}],&quot;depth&quot;:1}"/><!---->
<link href="/docs/transformers/main/ro/_app/immutable/assets/0.tn0RQdqM.css" rel="modulepreload"> <!--[--><!--[0--><!--[--><!--[0--><!--[--><!--[--><p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg><!----></button></div> <!--[-1--><!--]--></div><!----> <!--[0--><h1 class="relative group"><a id="procesatoare-de-imagini" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#procesatoare-de-imagini"><span><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg><!----></span></a> <span>Procesatoare de imagini</span></h1><!--]--><!----> <p>Procesatoarele de imagini convertesc imaginile în valori de pixeli, tensori care reprezintă culorile și dimensiunea imaginilor. Valorile de pixeli sunt input-urile unui model de viziune. Ca să se asigure că un model preantrenat primește input-ul corect, un procesator de imagini poate efectua următoarele operații ca imaginea să fie exact ca imaginile pe care modelul a fost preantrenat.</p> <ul><li>decupare centrată sau redimensionare a imaginii</li> <li>normalizarea sau rescalarea valorilor de pixeli</li></ul> <p>Folosește <code>from_pretrained()</code> ca să încarci configurația unui procesator de imagini (dimensiunea imaginii, dacă să normalizeze și rescaleze etc.) de la un model de viziune de pe Hub-ul Hugging Face sau dintr-un director local. Configurația pentru fiecare model preantrenat este salvată într-un fișier <a href="https://huggingface.co/google/vit-base-patch16-224/blob/main/preprocessor_config.json" rel="nofollow">preprocessor_config.json</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>)<!----></pre></div><!----> <p>Pasează o imagine procesatorului de imagini ca să o transformi în valori de pixeli și setează <code>return_tensors="pt"</code> ca să returnezi tensori PyTorch. Poți să printezi input-urile ca să vezi cum arată imaginea ca tensor.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> PIL <span class="hljs-keyword">import</span> Image
<span class="hljs-keyword">import</span> requests
url = <span class="hljs-string">&quot;https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/image_processor_example.png&quot;</span>
image = Image.<span class="hljs-built_in">open</span>(requests.get(url, stream=<span class="hljs-literal">True</span>).raw).convert(<span class="hljs-string">&quot;RGB&quot;</span>)
inputs = image_processor(image, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)<!----></pre></div><!----> <p>Acest ghid acoperă clasa procesatorului de imagini și cum să preprocesezi imagini pentru modelele de viziune.</p> <!--[1--><h2 class="relative group"><a id="clasele-de-procesatoare-de-imagini" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#clasele-de-procesatoare-de-imagini"><span><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg><!----></span></a> <span>Clasele de procesatoare de imagini</span></h2><!--]--><!----> <p>Procesatoarele de imagini folosesc o arhitectură bazată pe două backend-uri:</p> <ul><li><code>TorchvisionBackend</code> — implementarea implicită bazată pe <a href="https://pytorch.org/vision/stable/index.html" rel="nofollow">torchvision</a>. Accelerată GPU și de până la 33x mai rapidă decât backend-ul PIL pentru batch-uri de input-uri <a href="https://pytorch.org/docs/stable/tensors.html" rel="nofollow">torch.Tensor</a>. Toate modelele suportă acest backend; modelele mai noi suportă doar acest backend.</li> <li><code>PilBackend</code> — alternativa PIL/NumPy. Portabilă și doar pe CPU. Disponibilă doar pentru modelele mai vechi, unde este utilă ca să reproduci ieșirile numerice exacte ale implementării originale.</li></ul> <p>Backend-ul activ pe un procesator încărcat poate fi inspectat cu atributul <code>backend</code> (de ex., <code>processor.backend == "torchvision"</code>). Fiecare procesator de imagini subclasează <code>ImageProcessingMixin</code> care furnizează metodele <code>from_pretrained()</code> și <code>save_pretrained()</code>.</p> <p>Există două moduri în care poți încărca un procesator de imagini: cu <code>AutoImageProcessor</code> sau direct dintr-o clasă specifică modelului.</p> <div class="flex space-x-2 items-center my-1.5 mr-8 h-7 !pl-0 -mx-3 md:mx-0"><!--[--><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd border-gray-800 bg-black dark:bg-gray-700 text-white">AutoImageProcessor</div><div class="flex items-center border rounded-lg px-1.5 py-1 leading-none select-none text-smd text-gray-500 cursor-pointer opacity-90 hover:text-gray-700 dark:hover:text-gray-200 hover:shadow-sm">model-specific image processor</div><!--]--></div> <div class="language-select"><!--[--><!--[0--><!--[--><p>API-ul [AutoClass] furnizează o metodă convenabilă de a încărca un procesator de imagini fără să specifici direct modelul cu care procesatorul de imagini este asociat.</p> <p>Folosește <code>from_pretrained()</code> cu argumentul <code>backend</code> ca să selectezi backend-ul. Când <code>backend</code> este omis (implicit), torchvision este ales când este instalat, iar PIL este folosit altfel. Reține că <code>backend="pil"</code> este suportat doar pentru modele mai vechi; modelele mai noi expun doar backend-ul torchvision.</p> <blockquote><p><strong>Notă:</strong> un set mic de modele mai vechi (Chameleon, Flava, Idefics3, SmolVLM) folosesc interpolarea Lanczos pe care torchvision nu o suportă, deci revin mereu la backend-ul PIL indiferent de disponibilitatea torchvision. Pasează <code>backend="torchvision"</code> explicit ca să suprascrii asta.</p></blockquote> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
<span class="hljs-comment"># Implicit: alege torchvision dacă este disponibil, altfel pil</span>
image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>)
<span class="hljs-comment"># Solicită explicit backend-ul torchvision</span>
image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>, backend=<span class="hljs-string">&quot;torchvision&quot;</span>)
<span class="hljs-comment"># Solicită explicit backend-ul PIL (doar pentru modele care îl suportă)</span>
image_processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;google/vit-base-patch16-224&quot;</span>, backend=<span class="hljs-string">&quot;pil&quot;</span>)<!----></pre></div><!----><!--]--><!--]--><!----> <!--[-1--><!--]--><!----><!--]--></div><!----> <!--[1--><h2 class="relative group"><a id="procesatoare-cu-backend-torchvision" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#procesatoare-cu-backend-torchvision"><span><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg><!----></span></a> <span>Procesatoare cu backend torchvision</span></h2><!--]--><!----> <p><code>TorchvisionBackend</code> este backend-ul <strong>implicit</strong>. Asigură-te că <a href="https://pytorch.org/get-started/locally/#mac-installation" rel="nofollow">torchvision</a> este instalat, apoi încarcă-l cu <code>backend="torchvision"</code> (sau omite pur și simplu <code>backend</code>, deoarece torchvision este selectat automat când este disponibil).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoImageProcessor
processor = AutoImageProcessor.from_pretrained(<span class="hljs-string">&quot;facebook/detr-resnet-50&quot;</span>, backend=<span class="hljs-string">&quot;torchvision&quot;</span>)<!----></pre></div><!----> <p>Controlează pe ce device se face procesarea cu argumentul <code>device</code>. Procesarea se face implicit pe același device ca input-ul dacă input-urile sunt tensori, altfel revine la CPU. Exemplul de mai jos rulează procesarea pe GPU.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> torchvision.io <span class="hljs-keyword">import</span> read_image
<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DetrImageProcessor
images = read_image(<span class="hljs-string">&quot;image.jpg&quot;</span>)
processor = DetrImageProcessor.from_pretrained(<span class="hljs-string">&quot;facebook/detr-resnet-50&quot;</span>)
images_processed = processor(images, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>, device=<span class="hljs-string">&quot;cuda&quot;</span>)<!----></pre></div><!----> <details><summary>Benchmarks</summary> <p>Benchmark-urile sunt obținute de pe o instanță <a href="https://aws.amazon.com/ec2/instance-types/g5/" rel="nofollow">AWS EC2 g5.2xlarge</a> cu un GPU NVIDIA A10G Tensor Core.</p> <div class="flex"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_padded.png"/></div> <div class="flex"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_detr_fast_batched_compiled.png"/></div> <div class="flex"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_single.png"/></div> <div class="flex"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/benchmark_results_full_pipeline_rt_detr_fast_batched.png"/></div></details> <!--[1--><h2 class="relative group"><a id="preprocesare" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocesare"><span><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg><!----></span></a> <span>Preprocesare</span></h2><!--]--><!----> <p>Modelele de viziune din Transformers se așteaptă la input ca tensori PyTorch de valori de pixeli. Un procesator de imagini gestionează conversia imaginilor în valori de pixeli, reprezentate prin dimensiunea batch-ului, numărul de canale, înălțimea și lățimea. Ca să realizeze asta, o imagine este redimensionată (decupată central) și valorile de pixeli sunt normalizate și rescalate la valorile așteptate de model.</p> <p>Preprocesarea imaginilor nu este același lucru cu <em>augmentarea imaginilor</em>. Augmentarea imaginilor face modificări (luminozitate, culori, rotație etc.) unei imagini cu scopul de a crea exemple de antrenare noi sau de a preveni overfitting-ul. Preprocesarea imaginilor face modificări unei imagini cu scopul de a se potrivi formatului de input așteptat de un model preantrenat.</p> <p>De obicei, imaginile sunt augmentate (ca să crești performanța) și apoi preprocesate înainte de a fi pasate unui model. Poți folosi orice bibliotecă (<a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_albumentations.ipynb" rel="nofollow">Albumentations</a>, <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/image_classification_kornia.ipynb" rel="nofollow">Kornia</a>) pentru augmentare și un procesator de imagini pentru preprocesare.</p> <p>Acest ghid folosește modulul <a href="https://pytorch.org/vision/stable/transforms.html" rel="nofollow">transforms</a> din torchvision pentru augmentare.</p> <p>Începe prin a încărca un eșantion mic din dataset-ul <a href="https://hf.co/datasets/food101" rel="nofollow">food101</a>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
dataset = load_dataset(<span class="hljs-string">&quot;ethz/food101&quot;</span>, split=<span class="hljs-string">&quot;train[:100]&quot;</span>)<!----></pre></div><!----> <p>Din modulul <a href="https://pytorch.org/vision/stable/transforms.html" rel="nofollow">transforms</a>, folosește API-ul <a href="https://pytorch.org/vision/master/generated/torchvision.transforms.Compose.html" rel="nofollow">Compose</a> ca să înlănțuiești <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html" rel="nofollow">RandomResizedCrop</a> și <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.ColorJitter.html" rel="nofollow">ColorJitter</a>. Transformările astea decupează și redimensionează aleatoriu o imagine și ajustează aleatoriu culorile imaginii.</p> <p>Dimensiunea imaginii la care să decupezi aleatoriu poate fi obținută de la procesatorul de imagini. Pentru unele modele se așteaptă valori exacte pentru înălțime și lățime, iar pentru altele este necesar doar <code>shortest_edge</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> RandomResizedCrop, ColorJitter, Compose
size = (
image_processor.size[<span class="hljs-string">&quot;shortest_edge&quot;</span>]
<span class="hljs-keyword">if</span> <span class="hljs-string">&quot;shortest_edge&quot;</span> <span class="hljs-keyword">in</span> image_processor.size
<span class="hljs-keyword">else</span> (image_processor.size[<span class="hljs-string">&quot;height&quot;</span>], image_processor.size[<span class="hljs-string">&quot;width&quot;</span>])
)
_transforms = Compose([RandomResizedCrop(size), ColorJitter(brightness=<span class="hljs-number">0.5</span>, hue=<span class="hljs-number">0.5</span>)])<!----></pre></div><!----> <p>Aplică transformările pe imagini și convertește-le la formatul RGB. Apoi pasează imaginile augmentate procesatorului de imagini ca să returneze valorile de pixeli.</p> <p>Parametrul <code>do_resize</code> este setat la <code>False</code> pentru că imaginile au fost deja redimensionate în pasul de augmentare de <a href="https://pytorch.org/vision/main/generated/torchvision.transforms.RandomResizedCrop.html" rel="nofollow">RandomResizedCrop</a>. Dacă nu augmentezi imaginile, procesatorul de imagini le redimensionează și normalizează automat cu valorile <code>image_mean</code> și <code>image_std</code>. Aceste valori se găsesc în fișierul de configurație al preprocesatorului.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
images = [_transforms(img.convert(<span class="hljs-string">&quot;RGB&quot;</span>)) <span class="hljs-keyword">for</span> img <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
examples[<span class="hljs-string">&quot;pixel_values&quot;</span>] = image_processor(images, do_resize=<span class="hljs-literal">False</span>, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)[<span class="hljs-string">&quot;pixel_values&quot;</span>]
<span class="hljs-keyword">return</span> examples<!----></pre></div><!----> <p>Aplică funcția combinată de augmentare și preprocesare întregului dataset din mers cu <code>set_transform</code>.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!---->dataset.set_transform(transforms)<!----></pre></div><!----> <p>Convertește valorile de pixeli înapoi într-o imagine ca să vezi cum a fost augmentată și preprocesată imaginea.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt
img = dataset[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;pixel_values&quot;</span>]
plt.imshow(img.permute(<span class="hljs-number">1</span>, <span class="hljs-number">2</span>, <span class="hljs-number">0</span>))<!----></pre></div><!----> <div class="flex gap-4"><div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/vision-preprocess-tutorial.png"/> <figcaption class="mt-2 text-center text-sm text-gray-500">înainte</figcaption></div> <div><img class="rounded-xl" src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/preprocessed_image.png"/> <figcaption class="mt-2 text-center text-sm text-gray-500">după</figcaption></div></div> <p>Pentru alte task-uri de viziune, cum ar fi detecția obiectelor sau segmentarea, procesatorul de imagini include metode de post-procesare ca să convertească ieșirile brute ale modelului în predicții cu sens, cum ar fi bounding box-uri sau hărți de segmentare.</p> <!--[2--><h3 class="relative group"><a id="padding" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#padding"><span><svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg><!----></span></a> <span>Padding</span></h3><!--]--><!----> <p>Unele modele, ca [DETR], aplică <a href="https://paperswithcode.com/method/image-scale-augmentation" rel="nofollow">augmentarea la scară</a> în antrenare, ceea ce poate face ca imaginile dintr-un batch să aibă dimensiuni diferite. Imaginile cu dimensiuni diferite nu pot fi grupate în batch-uri.</p> <p>Ca să rezolvi asta, faci padding imaginilor cu token-ul special de padding <code>0</code>. Folosește metoda <a href="https://github.com/huggingface/transformers/blob/9578c2597e2d88b6f0b304b5a05864fd613ddcc1/src/transformers/models/detr/image_processing_detr.py#L1151" rel="nofollow">pad</a> ca să faci padding imaginilor și definește o funcție de collatare personalizată ca să le grupezi în batch-uri.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg><!----> <div class=" absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0 "><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent;"></div> Copied</div><!----></button><!----></div> <pre class="language-py "><!----><span class="hljs-keyword">def</span> <span class="hljs-title function_">collate_fn</span>(<span class="hljs-params">batch</span>):
pixel_values = [item[<span class="hljs-string">&quot;pixel_values&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
encoding = image_processor.pad(pixel_values, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
labels = [item[<span class="hljs-string">&quot;labels&quot;</span>] <span class="hljs-keyword">for</span> item <span class="hljs-keyword">in</span> batch]
batch = {}
batch[<span class="hljs-string">&quot;pixel_values&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_values&quot;</span>]
batch[<span class="hljs-string">&quot;pixel_mask&quot;</span>] = encoding[<span class="hljs-string">&quot;pixel_mask&quot;</span>]
batch[<span class="hljs-string">&quot;labels&quot;</span>] = labels
<span class="hljs-keyword">return</span> batch<!----></pre></div><!----> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ro/image_processors.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg><!----> <span><span class="underline">Update</span> on GitHub</span></a><!----> <p></p><!--]--><!--]--><!--]--><!--]--><!--]--> <!--[-1--><!--]--><!--]-->
<script>
{
__sveltekit_1piag1k = {
base: "/docs/transformers/main/ro",
assets: "/docs/transformers/main/ro"
};
const element = document.currentScript.parentElement;
Promise.all([
import("/docs/transformers/main/ro/_app/immutable/entry/start.Cj0_j3du.js"),
import("/docs/transformers/main/ro/_app/immutable/entry/app.2x2TdrVN.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 15],
data: [null,null],
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
42.3 kB
·
Xet hash:
5d062ea3c87b2c6993297ded662927767b5e31cf9e858bab621ed723c8556d8a

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.