Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / datasets /main /en /depth_estimation.html

rtrm

3 months ago

download

raw

36.1 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Depth estimation","local":"depth-estimation","sections":[],"depth":1}">
	<link href="/docs/datasets/main/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/entry/start.4d44eea4.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/scheduler.bdbef820.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/singletons.36b689ad.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/index.8a885b74.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/paths.27092e28.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/entry/app.d83067e8.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/index.c0aea24a.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/nodes/0.bfb01985.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/nodes/17.0b16a5f3.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/CodeBlock.6ccca92e.js">
	<link rel="modulepreload" href="/docs/datasets/main/en/_app/immutable/chunks/EditOnGithub.725ee0c1.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Depth estimation","local":"depth-estimation","sections":[],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="depth-estimation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#depth-estimation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Depth estimation</span></h1> <p data-svelte-h="svelte-10bebb7">Depth estimation datasets are used to train a model to approximate the relative distance of every pixel in an
	image from the camera, also known as depth. The applications enabled by these datasets primarily lie in areas like visual machine
	perception and perception in robotics. Example applications include mapping streets for self-driving cars. This guide will show you how to apply transformations
	to a depth estimation dataset.</p> <p data-svelte-h="svelte-1e3jamv">Before you start, make sure you have up-to-date versions of <code>albumentations</code> installed:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U albumentations <!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-180f5nb"><a href="https://albumentations.ai/" rel="nofollow">Albumentations</a> is a Python library for performing data augmentation
	for computer vision. It supports various computer vision tasks such as image classification, object
	detection, segmentation, and keypoint estimation.</p> <p data-svelte-h="svelte-rbl7ww">This guide uses the <a href="https://huggingface.co/datasets/sayakpaul/nyu_depth_v2" rel="nofollow">NYU Depth V2</a> dataset which is
	comprised of video sequences from various indoor scenes, recorded by RGB and depth cameras. The dataset consists of scenes from 3 cities and provides images along with
	their depth maps as labels.</p> <p data-svelte-h="svelte-pju1x0">Load the <code>train</code> split of the dataset and take a look at an example:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset

	<span class="hljs-meta">>>> </span>train_dataset = load_dataset(<span class="hljs-string">"sayakpaul/nyu_depth_v2"</span>, split=<span class="hljs-string">"train"</span>)
	<span class="hljs-meta">>>> </span>index = <span class="hljs-number">17</span>
	<span class="hljs-meta">>>> </span>example = train_dataset[index]
	<span class="hljs-meta">>>> </span>example
	{<span class="hljs-string">'image'</span>: <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x480>,
	<span class="hljs-string">'depth_map'</span>: <PIL.TiffImagePlugin.TiffImageFile image mode=F size=640x480>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wdl8tg">The dataset has two fields:</p> <ul data-svelte-h="svelte-18g7ww2"><li><code>image</code>: a PIL PNG image object with <code>uint8</code> data type.</li> <li><code>depth_map</code>: a PIL Tiff image object with <code>float32</code> data type which is the depth map of the image.</li></ul> <p data-svelte-h="svelte-lgny84">It is mention-worthy that JPEG/PNG format can only store <code>uint8</code> or <code>uint16</code> data. As the depth map is <code>float32</code> data, it can’t be stored using PNG/JPEG. However, we can save the depth map using TIFF format as it supports a wider range of data types, including <code>float32</code> data.</p> <p data-svelte-h="svelte-15dthpv">Next, check out an image with:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example[<span class="hljs-string">"image"</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1f5za0g"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_sample.png"></div> <p data-svelte-h="svelte-1rod9tf">Before we look at the depth map, we need to first convert its data type to <code>uint8</code> using <code>.convert('RGB')</code> as PIL can’t display <code>float32</code> images. Now take a look at its corresponding depth map:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example[<span class="hljs-string">"depth_map"</span>].convert(<span class="hljs-string">"RGB"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-130rqhd"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target.png"></div> <p data-svelte-h="svelte-16bopxj">It’s all black! You’ll need to add some color to the depth map to visualize it properly. To do that, either we can apply color automatically during display using <code>plt.imshow()</code> or create a colored depth map using <code>plt.cm</code> and then display it. In this example, we have used the latter one, as we can save/write the colored depth map later. (the utility below is taken from the <a href="https://github.com/dwofk/fast-depth/blob/master/utils.py" rel="nofollow">FastDepth repository</a>).</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt

	<span class="hljs-meta">>>> </span>cmap = plt.cm.viridis

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">colored_depthmap</span>(<span class="hljs-params">depth, d_min=<span class="hljs-literal">None</span>, d_max=<span class="hljs-literal">None</span></span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> d_min <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> d_max <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth)
	<span class="hljs-meta">... </span> depth_relative = (depth - d_min) / (d_max - d_min)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> <span class="hljs-number">255</span> * cmap(depth_relative)[:,:,:<span class="hljs-number">3</span>]

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">show_depthmap</span>(<span class="hljs-params">depth_map</span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> <span class="hljs-built_in">isinstance</span>(depth_map, np.ndarray):
	<span class="hljs-meta">... </span> depth_map = np.array(depth_map)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> depth_map.ndim == <span class="hljs-number">3</span>:
	<span class="hljs-meta">... </span> depth_map = depth_map.squeeze()

	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth_map)
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth_map)
	<span class="hljs-meta">... </span> depth_map = colored_depthmap(depth_map, d_min, d_max)

	<span class="hljs-meta">... </span> plt.imshow(depth_map.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)
	<span class="hljs-meta">... </span> plt.show()

	<span class="hljs-meta">>>> </span>show_depthmap(example[<span class="hljs-string">"depth_map"</span>])<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-16ha3wj"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target_viz.png"></div> <p data-svelte-h="svelte-llua6z">You can also visualize several different images and their corresponding depth maps.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">merge_into_row</span>(<span class="hljs-params">input_image, depth_target</span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> <span class="hljs-built_in">isinstance</span>(input_image, np.ndarray):
	<span class="hljs-meta">... </span> input_image = np.array(input_image)
	...
	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth_target)
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth_target)
	<span class="hljs-meta">... </span> depth_target_col = colored_depthmap(depth_target, d_min, d_max)
	<span class="hljs-meta">... </span> img_merge = np.hstack([input_image, depth_target_col])
	...
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> img_merge

	<span class="hljs-meta">>>> </span>random_indices = np.random.choice(<span class="hljs-built_in">len</span>(train_dataset), <span class="hljs-number">9</span>).tolist()
	<span class="hljs-meta">>>> </span>plt.figure(figsize=(<span class="hljs-number">15</span>, <span class="hljs-number">6</span>))
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> i, idx <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(random_indices):
	<span class="hljs-meta">... </span> example = train_dataset[idx]
	<span class="hljs-meta">... </span> ax = plt.subplot(<span class="hljs-number">3</span>, <span class="hljs-number">3</span>, i + <span class="hljs-number">1</span>)
	<span class="hljs-meta">... </span> image_viz = merge_into_row(
	<span class="hljs-meta">... </span> example[<span class="hljs-string">"image"</span>], example[<span class="hljs-string">"depth_map"</span>]
	<span class="hljs-meta">... </span> )
	<span class="hljs-meta">... </span> plt.imshow(image_viz.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-16sg8kz"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_collage.png"></div> <p data-svelte-h="svelte-18gbq1z">Now apply some augmentations with <code>albumentations</code>. The augmentation transformations include:</p> <ul data-svelte-h="svelte-1r8fe9e"><li>Random horizontal flipping</li> <li>Random cropping</li> <li>Random brightness and contrast</li> <li>Random gamma correction</li> <li>Random hue saturation</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> albumentations <span class="hljs-keyword">as</span> A

	<span class="hljs-meta">>>> </span>crop_size = (<span class="hljs-number">448</span>, <span class="hljs-number">576</span>)
	<span class="hljs-meta">>>> </span>transforms = [
	<span class="hljs-meta">... </span> A.HorizontalFlip(p=<span class="hljs-number">0.5</span>),
	<span class="hljs-meta">... </span> A.RandomCrop(crop_size[<span class="hljs-number">0</span>], crop_size[<span class="hljs-number">1</span>]),
	<span class="hljs-meta">... </span> A.RandomBrightnessContrast(),
	<span class="hljs-meta">... </span> A.RandomGamma(),
	<span class="hljs-meta">... </span> A.HueSaturationValue()
	<span class="hljs-meta">... </span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1yx9z74">Additionally, define a mapping to better reflect the target key name.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>additional_targets = {<span class="hljs-string">"depth"</span>: <span class="hljs-string">"mask"</span>}
	<span class="hljs-meta">>>> </span>aug = A.Compose(transforms=transforms, additional_targets=additional_targets)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1obm0dg">With <code>additional_targets</code> defined, you can pass the target depth maps to the <code>depth</code> argument of <code>aug</code> instead of <code>mask</code>. You’ll notice this change
	in the <code>apply_transforms()</code> function defined below.</p> <p data-svelte-h="svelte-11hy1qw">Create a function to apply the transformation to the images as well as their depth maps:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">apply_transforms</span>(<span class="hljs-params">examples</span>):
	<span class="hljs-meta">... </span> transformed_images, transformed_maps = [], []
	<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> image, depth_map <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(examples[<span class="hljs-string">"image"</span>], examples[<span class="hljs-string">"depth_map"</span>]):
	<span class="hljs-meta">... </span> image, depth_map = np.array(image), np.array(depth_map)
	<span class="hljs-meta">... </span> transformed = aug(image=image, depth=depth_map)
	<span class="hljs-meta">... </span> transformed_images.append(transformed[<span class="hljs-string">"image"</span>])
	<span class="hljs-meta">... </span> transformed_maps.append(transformed[<span class="hljs-string">"depth"</span>])
	...
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"pixel_values"</span>] = transformed_images
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"labels"</span>] = transformed_maps
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x4vpj6">Use the <a href="/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a> function to apply the transformation on-the-fly to batches of the dataset to consume less disk space:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>train_dataset.set_transform(apply_transforms)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k3k6ue">You can verify the transformation worked by indexing into the <code>pixel_values</code> and <code>labels</code> of an example image:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example = train_dataset[index]

	<span class="hljs-meta">>>> </span>plt.imshow(example[<span class="hljs-string">"pixel_values"</span>])
	<span class="hljs-meta">>>> </span>plt.axis(<span class="hljs-string">"off"</span>)
	<span class="hljs-meta">>>> </span>plt.show()<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1vylqwk"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_sample_aug.png"></div> <p data-svelte-h="svelte-1yo7m5l">Visualize the same transformation on the image’s corresponding depth map:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>show_depthmap(example[<span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1wigtnh"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target_aug.png"></div> <p data-svelte-h="svelte-wsimbk">You can also visualize multiple training samples reusing the previous <code>random_indices</code>:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>plt.figure(figsize=(<span class="hljs-number">15</span>, <span class="hljs-number">6</span>))

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> i, idx <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(random_indices):
	<span class="hljs-meta">... </span> ax = plt.subplot(<span class="hljs-number">3</span>, <span class="hljs-number">3</span>, i + <span class="hljs-number">1</span>)
	<span class="hljs-meta">... </span> example = train_dataset[idx]
	<span class="hljs-meta">... </span> image_viz = merge_into_row(
	<span class="hljs-meta">... </span> example[<span class="hljs-string">"pixel_values"</span>], example[<span class="hljs-string">"labels"</span>]
	<span class="hljs-meta">... </span> )
	<span class="hljs-meta">... </span> plt.imshow(image_viz.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-9icv47"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_aug_collage.png"></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/datasets/blob/main/docs/source/depth_estimation.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_w3org2 = {
	assets: "/docs/datasets/main/en",
	base: "/docs/datasets/main/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/datasets/main/en/_app/immutable/entry/start.4d44eea4.js"),
	import("/docs/datasets/main/en/_app/immutable/entry/app.d83067e8.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 17],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 36.1 kB
Xet hash:: 400b4dc63df9350c73a1743f9116442208999aaa0a55f9b0f6dfdd914d2e52ba

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.