Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / datasets /pr_8113 /en /depth_estimation.html

HuggingFaceDocBuilder

2 months ago

download

raw

40.2 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"Depth estimation","local":"depth-estimation","sections":[],"depth":1}">
	<link href="/docs/datasets/pr_8113/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/entry/start.969da75e.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/scheduler.d75c11ed.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/singletons.61bfd4fd.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/index.d12496d4.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/paths.4c60f2bc.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/entry/app.687c6ad1.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/preload-helper.a19054d5.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/index.4ec9dfe9.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/nodes/0.4378a702.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/nodes/16.bafaa4e8.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.903f9bf7.js">
	<link rel="modulepreload" href="/docs/datasets/pr_8113/en/_app/immutable/chunks/CodeBlock.77fa95e2.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"Depth estimation","local":"depth-estimation","sections":[],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <h1 class="relative group"><a id="depth-estimation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#depth-estimation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Depth estimation</span></h1> <p data-svelte-h="svelte-10bebb7">Depth estimation datasets are used to train a model to approximate the relative distance of every pixel in an
	image from the camera, also known as depth. The applications enabled by these datasets primarily lie in areas like visual machine
	perception and perception in robotics. Example applications include mapping streets for self-driving cars. This guide will show you how to apply transformations
	to a depth estimation dataset.</p> <p data-svelte-h="svelte-1e3jamv">Before you start, make sure you have up-to-date versions of <code>albumentations</code> installed:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -U albumentations <!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-180f5nb"><a href="https://albumentations.ai/" rel="nofollow">Albumentations</a> is a Python library for performing data augmentation
	for computer vision. It supports various computer vision tasks such as image classification, object
	detection, segmentation, and keypoint estimation.</p> <p data-svelte-h="svelte-rbl7ww">This guide uses the <a href="https://huggingface.co/datasets/sayakpaul/nyu_depth_v2" rel="nofollow">NYU Depth V2</a> dataset which is
	comprised of video sequences from various indoor scenes, recorded by RGB and depth cameras. The dataset consists of scenes from 3 cities and provides images along with
	their depth maps as labels.</p> <p data-svelte-h="svelte-pju1x0">Load the <code>train</code> split of the dataset and take a look at an example:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset

	<span class="hljs-meta">>>> </span>train_dataset = load_dataset(<span class="hljs-string">"sayakpaul/nyu_depth_v2"</span>, split=<span class="hljs-string">"train"</span>)
	<span class="hljs-meta">>>> </span>index = <span class="hljs-number">17</span>
	<span class="hljs-meta">>>> </span>example = train_dataset[index]
	<span class="hljs-meta">>>> </span>example
	{<span class="hljs-string">'image'</span>: <PIL.PngImagePlugin.PngImageFile image mode=RGB size=640x480>,
	<span class="hljs-string">'depth_map'</span>: <PIL.TiffImagePlugin.TiffImageFile image mode=F size=640x480>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wdl8tg">The dataset has two fields:</p> <ul data-svelte-h="svelte-18g7ww2"><li><code>image</code>: a PIL PNG image object with <code>uint8</code> data type.</li> <li><code>depth_map</code>: a PIL Tiff image object with <code>float32</code> data type which is the depth map of the image.</li></ul> <p data-svelte-h="svelte-1806ij4">Here the depth maps are using TIFF format as it supports a wide range of data types, including <code>float32</code> data.
	However it is mention-worthy that JPEG/PNG format can only store <code>uint8</code> or <code>uint16</code> data.
	Therefore if you have depth maps saved as JPEG/PNG, use the <code>Image(mode="F")</code> type to load them as single channel <code>float32</code> like normal depth maps:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> Image

	<span class="hljs-meta">>>> </span>train_dataset = train_dataset.cast_column(<span class="hljs-string">"depth_map"</span>, Image(mode=<span class="hljs-string">"F"</span>))<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15dthpv">Next, check out an image with:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example[<span class="hljs-string">"image"</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1f5za0g"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_sample.png"></div> <p data-svelte-h="svelte-1rod9tf">Before we look at the depth map, we need to first convert its data type to <code>uint8</code> using <code>.convert('RGB')</code> as PIL can’t display <code>float32</code> images. Now take a look at its corresponding depth map:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example[<span class="hljs-string">"depth_map"</span>].convert(<span class="hljs-string">"RGB"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-130rqhd"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target.png"></div> <p data-svelte-h="svelte-16bopxj">It’s all black! You’ll need to add some color to the depth map to visualize it properly. To do that, either we can apply color automatically during display using <code>plt.imshow()</code> or create a colored depth map using <code>plt.cm</code> and then display it. In this example, we have used the latter one, as we can save/write the colored depth map later. (the utility below is taken from the <a href="https://github.com/dwofk/fast-depth/blob/master/utils.py" rel="nofollow">FastDepth repository</a>).</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> matplotlib.pyplot <span class="hljs-keyword">as</span> plt

	<span class="hljs-meta">>>> </span>cmap = plt.cm.viridis

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">colored_depthmap</span>(<span class="hljs-params">depth, d_min=<span class="hljs-literal">None</span>, d_max=<span class="hljs-literal">None</span></span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> d_min <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> d_max <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth)
	<span class="hljs-meta">... </span> depth_relative = (depth - d_min) / (d_max - d_min)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> <span class="hljs-number">255</span> * cmap(depth_relative)[:,:,:<span class="hljs-number">3</span>]

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">show_depthmap</span>(<span class="hljs-params">depth_map</span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> <span class="hljs-built_in">isinstance</span>(depth_map, np.ndarray):
	<span class="hljs-meta">... </span> depth_map = np.array(depth_map)
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> depth_map.ndim == <span class="hljs-number">3</span>:
	<span class="hljs-meta">... </span> depth_map = depth_map.squeeze()

	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth_map)
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth_map)
	<span class="hljs-meta">... </span> depth_map = colored_depthmap(depth_map, d_min, d_max)

	<span class="hljs-meta">... </span> plt.imshow(depth_map.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)
	<span class="hljs-meta">... </span> plt.show()

	<span class="hljs-meta">>>> </span>show_depthmap(example[<span class="hljs-string">"depth_map"</span>])<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-16ha3wj"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target_viz.png"></div> <p data-svelte-h="svelte-llua6z">You can also visualize several different images and their corresponding depth maps.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">merge_into_row</span>(<span class="hljs-params">input_image, depth_target</span>):
	<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> <span class="hljs-built_in">isinstance</span>(input_image, np.ndarray):
	<span class="hljs-meta">... </span> input_image = np.array(input_image)
	...
	<span class="hljs-meta">... </span> d_min = np.<span class="hljs-built_in">min</span>(depth_target)
	<span class="hljs-meta">... </span> d_max = np.<span class="hljs-built_in">max</span>(depth_target)
	<span class="hljs-meta">... </span> depth_target_col = colored_depthmap(depth_target, d_min, d_max)
	<span class="hljs-meta">... </span> img_merge = np.hstack([input_image, depth_target_col])
	...
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> img_merge

	<span class="hljs-meta">>>> </span>random_indices = np.random.choice(<span class="hljs-built_in">len</span>(train_dataset), <span class="hljs-number">9</span>).tolist()
	<span class="hljs-meta">>>> </span>plt.figure(figsize=(<span class="hljs-number">15</span>, <span class="hljs-number">6</span>))
	<span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> i, idx <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(random_indices):
	<span class="hljs-meta">... </span> example = train_dataset[idx]
	<span class="hljs-meta">... </span> ax = plt.subplot(<span class="hljs-number">3</span>, <span class="hljs-number">3</span>, i + <span class="hljs-number">1</span>)
	<span class="hljs-meta">... </span> image_viz = merge_into_row(
	<span class="hljs-meta">... </span> example[<span class="hljs-string">"image"</span>], example[<span class="hljs-string">"depth_map"</span>]
	<span class="hljs-meta">... </span> )
	<span class="hljs-meta">... </span> plt.imshow(image_viz.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-16sg8kz"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_collage.png"></div> <p data-svelte-h="svelte-18gbq1z">Now apply some augmentations with <code>albumentations</code>. The augmentation transformations include:</p> <ul data-svelte-h="svelte-1r8fe9e"><li>Random horizontal flipping</li> <li>Random cropping</li> <li>Random brightness and contrast</li> <li>Random gamma correction</li> <li>Random hue saturation</li></ul> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> albumentations <span class="hljs-keyword">as</span> A

	<span class="hljs-meta">>>> </span>crop_size = (<span class="hljs-number">448</span>, <span class="hljs-number">576</span>)
	<span class="hljs-meta">>>> </span>transforms = [
	<span class="hljs-meta">... </span> A.HorizontalFlip(p=<span class="hljs-number">0.5</span>),
	<span class="hljs-meta">... </span> A.RandomCrop(crop_size[<span class="hljs-number">0</span>], crop_size[<span class="hljs-number">1</span>]),
	<span class="hljs-meta">... </span> A.RandomBrightnessContrast(),
	<span class="hljs-meta">... </span> A.RandomGamma(),
	<span class="hljs-meta">... </span> A.HueSaturationValue()
	<span class="hljs-meta">... </span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1yx9z74">Additionally, define a mapping to better reflect the target key name.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>additional_targets = {<span class="hljs-string">"depth"</span>: <span class="hljs-string">"mask"</span>}
	<span class="hljs-meta">>>> </span>aug = A.Compose(transforms=transforms, additional_targets=additional_targets)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1obm0dg">With <code>additional_targets</code> defined, you can pass the target depth maps to the <code>depth</code> argument of <code>aug</code> instead of <code>mask</code>. You’ll notice this change
	in the <code>apply_transforms()</code> function defined below.</p> <p data-svelte-h="svelte-11hy1qw">Create a function to apply the transformation to the images as well as their depth maps:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">apply_transforms</span>(<span class="hljs-params">examples</span>):
	<span class="hljs-meta">... </span> transformed_images, transformed_maps = [], []
	<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> image, depth_map <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(examples[<span class="hljs-string">"image"</span>], examples[<span class="hljs-string">"depth_map"</span>]):
	<span class="hljs-meta">... </span> image, depth_map = np.array(image), np.array(depth_map)
	<span class="hljs-meta">... </span> transformed = aug(image=image, depth=depth_map)
	<span class="hljs-meta">... </span> transformed_images.append(transformed[<span class="hljs-string">"image"</span>])
	<span class="hljs-meta">... </span> transformed_maps.append(transformed[<span class="hljs-string">"depth"</span>])
	...
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"pixel_values"</span>] = transformed_images
	<span class="hljs-meta">... </span> examples[<span class="hljs-string">"labels"</span>] = transformed_maps
	<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12nty9f">Use the <a href="/docs/datasets/pr_8113/en/package_reference/main_classes#datasets.Dataset.set_transform">set_transform()</a> function to apply the transformation on-the-fly to batches of the dataset to consume less disk space:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>train_dataset.set_transform(apply_transforms)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k3k6ue">You can verify the transformation worked by indexing into the <code>pixel_values</code> and <code>labels</code> of an example image:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example = train_dataset[index]

	<span class="hljs-meta">>>> </span>plt.imshow(example[<span class="hljs-string">"pixel_values"</span>])
	<span class="hljs-meta">>>> </span>plt.axis(<span class="hljs-string">"off"</span>)
	<span class="hljs-meta">>>> </span>plt.show()<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1vylqwk"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_sample_aug.png"></div> <p data-svelte-h="svelte-1yo7m5l">Visualize the same transformation on the image’s corresponding depth map:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>show_depthmap(example[<span class="hljs-string">"labels"</span>])<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-1wigtnh"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_target_aug.png"></div> <p data-svelte-h="svelte-wsimbk">You can also visualize multiple training samples reusing the previous <code>random_indices</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>plt.figure(figsize=(<span class="hljs-number">15</span>, <span class="hljs-number">6</span>))

	<span class="hljs-meta">>>> </span><span class="hljs-keyword">for</span> i, idx <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(random_indices):
	<span class="hljs-meta">... </span> ax = plt.subplot(<span class="hljs-number">3</span>, <span class="hljs-number">3</span>, i + <span class="hljs-number">1</span>)
	<span class="hljs-meta">... </span> example = train_dataset[idx]
	<span class="hljs-meta">... </span> image_viz = merge_into_row(
	<span class="hljs-meta">... </span> example[<span class="hljs-string">"pixel_values"</span>], example[<span class="hljs-string">"labels"</span>]
	<span class="hljs-meta">... </span> )
	<span class="hljs-meta">... </span> plt.imshow(image_viz.astype(<span class="hljs-string">"uint8"</span>))
	<span class="hljs-meta">... </span> plt.axis(<span class="hljs-string">"off"</span>)<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-9icv47"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/depth_est_aug_collage.png"></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/datasets/blob/main/docs/source/depth_estimation.mdx" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_1j7f9j2 = {
	assets: "/docs/datasets/pr_8113/en",
	base: "/docs/datasets/pr_8113/en",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/datasets/pr_8113/en/_app/immutable/entry/start.969da75e.js"),
	import("/docs/datasets/pr_8113/en/_app/immutable/entry/app.687c6ad1.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 16],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 40.2 kB
Xet hash:: 9301b3ea1d41d19ebd7a29b284838b8dbee388a6e0c42b53c7a76dd4605e1288

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.