Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / datasets /pr_8021 /en /semantic_segmentation.html

rtrm

3 months ago

download

raw

30.8 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Semantic segmentation","local":"semantic-segmentation","sections":[],"depth":1}">
	<link href="/docs/datasets/pr_8021/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/entry/start.467c4c66.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/scheduler.d75c11ed.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/singletons.24e4ec1f.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/index.d12496d4.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/paths.409c1290.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/entry/app.3b2ba720.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/preload-helper.a99c0584.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/index.4ec9dfe9.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/nodes/0.5fda7065.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/nodes/42.0bcb8a09.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.ee0f129e.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8021/en/_app/immutable/chunks/CodeBlock.5919a092.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Semantic segmentation","local":"semantic-segmentation","sections":[],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="semantic-segmentation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#semantic-segmentation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Semantic segmentation</span></h1> <p data-svelte-h="svelte-fw4ret">Semantic segmentation datasets are used to train a model to classify every pixel in an image. There are
	a wide variety of applications enabled by these datasets such as background removal from images, stylizing
	images, or scene understanding for autonomous driving. This guide will show you how to apply transformations
	to an image segmentation dataset.</p> <p data-svelte-h="svelte-1qtuikq">Before you start, make sure you have up-to-date versions of <code>albumentations</code> and <code>cv2</code> installed:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U albumentations opencv-python<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-180f5nb"><a href="https://albumentations.ai/" rel="nofollow">Albumentations</a> is a Python library for performing data augmentation
	for computer vision. It supports various computer vision tasks such as image classification, object
	detection, segmentation, and keypoint estimation.</p> <p data-svelte-h="svelte-1jjf6cy">This guide uses the <a href="https://huggingface.co/datasets/scene_parse_150" rel="nofollow">Scene Parsing</a> dataset for segmenting
	and parsing an image into different image regions associated with semantic categories, such as sky, road, person, and bed.</p> <p data-svelte-h="svelte-pju1x0">Load the <code>train</code> split of the dataset and take a look at an example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset

	<span class="hljs-meta">>>> </span>dataset = load_dataset(<span class="hljs-string">"scene_parse_150"</span>, split=<span class="hljs-string">"train"</span>)
	<span class="hljs-meta">>>> </span>index = <span class="hljs-number">10</span>
	<span class="hljs-meta">>>> </span>dataset[index]
	{<span class="hljs-string">'image'</span>: <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=683x512 at <span class="hljs-number">0x7FB37B0EC810</span>>,
	<span class="hljs-string">'annotation'</span>: <PIL.PngImagePlugin.PngImageFile image mode=L size=683x512 at <span class="hljs-number">0x7FB37B0EC9D0</span>>,
	<span class="hljs-string">'scene_category'</span>: <span class="hljs-number">927</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-jjwn46">The dataset has three fields:</p> <ul data-svelte-h="svelte-1r68wni"><li><code>image</code>: a PIL image object.</li> <li><code>annotation</code>: segmentation mask of the image.</li> <li><code>scene_category</code>: the label or scene category of the image (like “kitchen” or “office”).</li></ul> <p data-svelte-h="svelte-15dthpv">Next, check out an image with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[index][<span class="hljs-string">"image"</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1337354"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/image_seg.png"></div> <p data-svelte-h="svelte-qlholw">Similarly, you can check out the respective segmentation mask:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset[index][<span class="hljs-string">"annotation"</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-ru2vd9"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/seg_mask.png"></div> <p data-svelte-h="svelte-vzaie7">We can also add a <a href="https://github.com/tensorflow/models/blob/3f1ca33afe3c1631b733ea7e40c294273b9e406d/research/deeplab/utils/get_dataset_colormap.py#L51" rel="nofollow">color palette</a> on the
	segmentation mask and overlay it on top of the original image to visualize the dataset:</p> <p data-svelte-h="svelte-vvbf2j">After defining the color palette, you should be ready to visualize some overlays.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">visualize_seg_mask</span>(<span class="hljs-params">image: np.ndarray, mask: np.ndarray</span>):
	<span class="hljs-meta">... </span> color_seg = np.zeros((mask.shape[<span class="hljs-number">0</span>], mask.shape[<span class="hljs-number">1</span>], <span class="hljs-number">3</span>), dtype=np.uint8)
	<span class="hljs-meta">... </span> palette = np.array(create_ade20k_label_colormap())
	<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> label, color <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(palette):
	<span class="hljs-meta">... </span> color_seg[mask == label, :] = color
	<span class="hljs-meta">... </span> color_seg = color_seg[..., ::-<span class="hljs-number">1</span>] <span class="hljs-comment"># convert to BGR</span>

	<span class="hljs-meta">... </span> img = np.array(image) * <span class="hljs-number">0.5</span> + color_seg * <span class="hljs-number">0.5</span> <span class="hljs-comment"># plot the image with the segmentation map</span>
	<span class="hljs-meta">... </span> img = img.astype(np.uint8)

	<span class="hljs-meta">... </span> plt.figure(figsize=(<span class="hljs-number">15</span>, <span class="hljs-number">10</span>))
	<span class="hljs-meta">... </span> plt.imshow(img)
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)
	<span class="hljs-meta">... </span> plt.show()


	<span class="hljs-meta">>>> </span>visualize_seg_mask(
	<span class="hljs-meta">... </span> np.array(dataset[index][<span class="hljs-string">"image"</span>]),
	<span class="hljs-meta">... </span> np.array(dataset[index][<span class="hljs-string">"annotation"</span>])
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1unth95"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/seg_overlay.png"></div> <p data-svelte-h="svelte-c3rxqx">Now apply some augmentations with <code>albumentations</code>. You’ll first resize the image and adjust its brightness.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> albumentations

	<span class="hljs-meta">>>> </span>transform = albumentations.Compose(
	<span class="hljs-meta">... </span> [
	<span class="hljs-meta">... </span> albumentations.Resize(<span class="hljs-number">256</span>, <span class="hljs-number">256</span>),
	<span class="hljs-meta">... </span> albumentations.RandomBrightnessContrast(brightness_limit=<span class="hljs-number">0.3</span>, contrast_limit=<span class="hljs-number">0.3</span>, p=<span class="hljs-number">0.5</span>),
	<span class="hljs-meta">... </span> ]
	<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-50733g">Create a function to apply the transformation to the images:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">transforms</span>(<span class="hljs-params">examples</span>):
	<span class="hljs-meta">... </span> transformed_images, transformed_masks = [], []
	...
	<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> image, seg_mask <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(examples[<span class="hljs-string">"image"</span>], examples[<span class="hljs-string">"annotation"</span>]):
	<span class="hljs-meta">... </span> image, seg_mask = np.array(image), np.array(seg_mask)
	<span class="hljs-meta">... </span> transformed = transform(image=image, mask=seg_mask)
	<span class="hljs-meta">... </span> transformed_images.append(transformed[<span class="hljs-string">"image"</span>])
	<span class="hljs-meta">... </span> transformed_masks.append(transformed[<span class="hljs-string">"mask"</span>])
	...
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"pixel_values"</span>] = transformed_images
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"label"</span>] = transformed_masks
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pgazmt">Use the <a href="/docs/datasets/pr_8021/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a> function to apply the transformation on-the-fly to batches of the dataset to consume less disk space:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>dataset.set_transform(transforms)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-11iow8s">You can verify the transformation worked by indexing into the <code>pixel_values</code> and <code>label</code> of an example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>image = np.array(dataset[index][<span class="hljs-string">"pixel_values"</span>])
	<span class="hljs-meta">>>> </span>mask = np.array(dataset[index][<span class="hljs-string">"label"</span>])

	<span class="hljs-meta">>>> </span>visualize_seg_mask(image, mask)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-rz0i83"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/albumentations_seg.png"></div> <p data-svelte-h="svelte-o9kxh2">In this guide, you have used <code>albumentations</code> for augmenting the dataset. It’s also possible to use <code>torchvision</code> to apply some similar transforms.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> torchvision.transforms <span class="hljs-keyword">import</span> Resize, ColorJitter, Compose

	<span class="hljs-meta">>>> </span>transformation_chain = Compose([
	<span class="hljs-meta">... </span> Resize((<span class="hljs-number">256</span>, <span class="hljs-number">256</span>)),
	<span class="hljs-meta">... </span> ColorJitter(brightness=<span class="hljs-number">0.25</span>, contrast=<span class="hljs-number">0.25</span>, saturation=<span class="hljs-number">0.25</span>, hue=<span class="hljs-number">0.1</span>)
	<span class="hljs-meta">... </span>])
	<span class="hljs-meta">>>> </span>resize = Resize((<span class="hljs-number">256</span>, <span class="hljs-number">256</span>))

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">train_transforms</span>(<span class="hljs-params">example_batch</span>):
	<span class="hljs-meta">... </span> example_batch[<span class="hljs-string">"pixel_values"</span>] = [transformation_chain(x) <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> example_batch[<span class="hljs-string">"image"</span>]]
	<span class="hljs-meta">... </span> example_batch[<span class="hljs-string">"label"</span>] = [resize(x) <span class="hljs-keyword">for</span> x <span class="hljs-keyword">in</span> example_batch[<span class="hljs-string">"annotation"</span>]]
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> example_batch

	<span class="hljs-meta">>>> </span>dataset.set_transform(train_transforms)

	<span class="hljs-meta">>>> </span>image = np.array(dataset[index][<span class="hljs-string">"pixel_values"</span>])
	<span class="hljs-meta">>>> </span>mask = np.array(dataset[index][<span class="hljs-string">"label"</span>])

	<span class="hljs-meta">>>> </span>visualize_seg_mask(image, mask)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-jb5bqb"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/torchvision_seg.png"></div> <blockquote class="tip" data-svelte-h="svelte-57s756"><p>Now that you know how to process a dataset for semantic segmentation, learn
	<a href="https://huggingface.co/docs/transformers/tasks/semantic_segmentation" rel="nofollow">how to train a semantic segmentation model</a>
	and use it for inference.</p></blockquote> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/datasets/blob/main/docs/source/semantic_segmentation.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1tcoqe3 = {
	assets: "/docs/datasets/pr_8021/en",
	base: "/docs/datasets/pr_8021/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/datasets/pr_8021/en/_app/immutable/entry/start.467c4c66.js"),
	import("/docs/datasets/pr_8021/en/_app/immutable/entry/app.3b2ba720.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 42],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 30.8 kB
Xet hash:: 592328423e1bc8a49eba248e2d5944b0371bcbd26b44c13671309e458e038b6c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.