Buckets:

hf-doc-build
/

doc-dev

Files

xet

hf-doc-build/doc-dev / course /pr_1021 /zh-CN /chapter9 /3.html

rtrm

about 2 months ago

download

raw

28.7 kB

	<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"了解 Interface 类","local":"了解 Interface 类","sections":[{"title":"如何创建 Interface","local":"如何创建 Interface","sections":[],"depth":2},{"title":"一个音频组件的简单示例","local":"一个音频组件的简单示例","sections":[],"depth":2},{"title":"处理多个输入和输出","local":"处理多个输入和输出","sections":[{"title":"launch() 方法","local":"launch-方法","sections":[],"depth":3}],"depth":2},{"title":"✏️ 让我们实践一下！","local":"-让我们实践一下","sections":[],"depth":2}],"depth":1}">
	<link href="/docs/course/pr_1021/zh-CN/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/entry/start.f3a1a511.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/scheduler.37c15a92.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/singletons.9bf55235.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/index.18351ede.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/paths.0ba10750.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/entry/app.c39e37cf.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/index.2bf4358c.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/nodes/0.dad18ce3.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/each.e59479a4.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/nodes/72.6742a769.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/CodeBlock.4e987730.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/DocNotebookDropdown.efc1fb7c.js">
	<link rel="modulepreload" href="/docs/course/pr_1021/zh-CN/_app/immutable/chunks/getInferenceSnippets.ebf8be91.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"了解 Interface 类","local":"了解 Interface 类","sections":[{"title":"如何创建 Interface","local":"如何创建 Interface","sections":[],"depth":2},{"title":"一个音频组件的简单示例","local":"一个音频组件的简单示例","sections":[],"depth":2},{"title":"处理多个输入和输出","local":"处理多个输入和输出","sections":[{"title":"launch() 方法","local":"launch-方法","sections":[],"depth":3}],"depth":2},{"title":"✏️ 让我们实践一下！","local":"-让我们实践一下","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="了解 Interface 类" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#了解 Interface 类"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>了解 Interface 类</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter9/section3.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter9/section3.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <p data-svelte-h="svelte-laf13r">在这一节中，我们将更详细地了解 <code>Interface</code> 类，并理解创建 Interface 时使用的主要参数的含义和设置方法。</p> <h2 class="relative group"><a id="如何创建 Interface" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#如何创建 Interface"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>如何创建 Interface</span></h2> <p data-svelte-h="svelte-o1yr13">你会注意到 <code>Interface</code> 类有 3 个必需参数： <code>Interface(fn, inputs, outputs, ...)</code></p> <p data-svelte-h="svelte-gqxxez">这些参数的含义是：</p> <ul data-svelte-h="svelte-1k2if01"><li><code>fn</code> ：由 Gradio 接口包装的预测函数。该函数可以接受一个或多个参数并返回一个或多个值</li> <li><code>inputs</code> ：输入组件类型。Gradio 提供了许多预构建的组件，例如 <code>image</code> 或 <code>mic</code> 。</li> <li><code>outputs</code> ：输出组件类型。同样，Gradio 提供了许多预构建的组件，例如 <code>image</code> 或 <code>label</code> 。</li></ul> <p data-svelte-h="svelte-cf195f">可以使用组件的完整列表请参阅 <a href="https://gradio.app/docs" rel="nofollow">Gradio 文档</a> 。每个预构建的组件都可以通过实例化该组件对应的类来定制。</p> <p data-svelte-h="svelte-17o6fl6">例如，正如我们在 <a href="/course/chapter9/2">前一小节</a> 中看到的，你可以将一个 <code>Textbox(lines=7, label="Prompt")</code> 组件传递给 <code>inputs</code> 参数，而不是将 <code>"textbox"</code> 以字符串形式传递进去，这样就可以创建一个 7 行并包含一个标签的文本框。</p> <p data-svelte-h="svelte-1i07wje">让我们看另一个例子，这个例子使用了 <code>Audio</code> 组件。</p> <h2 class="relative group"><a id="一个音频组件的简单示例" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#一个音频组件的简单示例"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>一个音频组件的简单示例</span></h2> <p data-svelte-h="svelte-14ytwgm">如前所述，Gradio 提供了许多不同的输入和输出组件。因此，让我们构建一个适用于音频的 <code>Interface</code> 。</p> <p data-svelte-h="svelte-joy6oa">在这个例子中，我们将构建一个输入和输出都是音频的函数，它可以接收一个音频文件后将其反转并返回。</p> <p data-svelte-h="svelte-6lotiv">我们将使用 <code>Audio</code> 组件作为输入。使用 <code>Audio</code> 组件时，你可以通过 <code>source</code> 指定输入音频的方式是上传的音频文件还是通过麦克风实时录制的声音。在这个例子中，让我们将其设置为“麦克风”。为了让交互更加友好，我们会在我们的 <code>Audio</code> 上添加一个标签，上面写着“Speak here.”。</p> <p data-svelte-h="svelte-1fbke88">此外，我们希望函数接收的音频是 <code>numpy</code> 数组格式，这样我们可以轻松地“反转”它。因此我们将 <code>"type"</code> 设置为 <code>"numpy"</code> ，它会将传递输入 data 转换为 <code>(sample_rate,data)</code> 的元组输入到我们的函数。</p> <p data-svelte-h="svelte-19r0yoy">我们还将使用 <code>Audio</code> 作为输出组件，它可以自动将根据采样率和音频数据将 numpy 数组渲染为可播放的音频文件。因此，在这个例子中不需要对输出组件进行修改，只需要传递一个 <code>"audio"</code> 字符串。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr


	<span class="hljs-keyword">def</span> <span class="hljs-title function_">reverse_audio</span>(<span class="hljs-params">audio</span>):
	sr, data = audio
	reversed_audio = (sr, np.flipud(data))
	<span class="hljs-keyword">return</span> reversed_audio


	mic = gr.Audio(source=<span class="hljs-string">"microphone"</span>, <span class="hljs-built_in">type</span>=<span class="hljs-string">"numpy"</span>, label=<span class="hljs-string">"Speak here..."</span>)
	gr.Interface(reverse_audio, mic, <span class="hljs-string">"audio"</span>).launch()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2urhun">上面的代码将生成一个如下所示的界面（如果你的浏览器没有要求你授权麦克风权限，请在 <a href="https://huggingface.co/spaces/course-demos/audio-reverse" target="_blank">新标签页中打开演示</a>。）</p> <iframe src="https://course-demos-audio-reverse.hf.space" frameborder="0" height="250" title="Gradio app" class="container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <p data-svelte-h="svelte-1fo3iq1">现在你可以录制你的声音并听到倒放的音频了 - 太神奇了 👻！</p> <h2 class="relative group"><a id="处理多个输入和输出" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#处理多个输入和输出"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>处理多个输入和输出</span></h2> <p data-svelte-h="svelte-1kkn6le">假设我们有一个更复杂的预测函数，有多个输入和输出。在下面的示例中，我我们有一个函数，它接收一个下拉框索引、一个滑块值和一个数字，并返回一个特定音调的音频样本。</p> <p data-svelte-h="svelte-rayjrw">让我们看看该如何传递输入和输出组件列表，看看你能不能理解他们。</p> <p data-svelte-h="svelte-fs669w">关键要传递：</p> <ul data-svelte-h="svelte-1spg63n"><li>输入组件列表，每个组件依次对应一个参数。</li> <li>输出组件列表，每个组件对应一个返回值。</li></ul> <p data-svelte-h="svelte-1osi2ri">下面的代码片段显示了三个输入组件如何与 <code>generate_tone()</code> 函数的三个参数一一对齐：</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
	<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr

	notes = [<span class="hljs-string">"C"</span>, <span class="hljs-string">"C#"</span>, <span class="hljs-string">"D"</span>, <span class="hljs-string">"D#"</span>, <span class="hljs-string">"E"</span>, <span class="hljs-string">"F"</span>, <span class="hljs-string">"F#"</span>, <span class="hljs-string">"G"</span>, <span class="hljs-string">"G#"</span>, <span class="hljs-string">"A"</span>, <span class="hljs-string">"A#"</span>, <span class="hljs-string">"B"</span>]


	<span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_tone</span>(<span class="hljs-params">note, octave, duration</span>):
	sr = <span class="hljs-number">48000</span>
	a4_freq, tones_from_a4 = <span class="hljs-number">440</span>, <span class="hljs-number">12</span> * (octave - <span class="hljs-number">4</span>) + (note - <span class="hljs-number">9</span>)
	frequency = a4_freq * <span class="hljs-number">2</span> ** (tones_from_a4 / <span class="hljs-number">12</span>)
	duration = <span class="hljs-built_in">int</span>(duration)
	audio = np.linspace(<span class="hljs-number">0</span>, duration, duration * sr)
	audio = (<span class="hljs-number">20000</span> * np.sin(audio * (<span class="hljs-number">2</span> * np.pi * frequency))).astype(np.int16)
	<span class="hljs-keyword">return</span> (sr, audio)


	gr.Interface(
	generate_tone,
	[
	gr.Dropdown(notes, <span class="hljs-built_in">type</span>=<span class="hljs-string">"index"</span>),
	gr.Slider(minimum=<span class="hljs-number">4</span>, maximum=<span class="hljs-number">6</span>, step=<span class="hljs-number">1</span>),
	gr.Textbox(<span class="hljs-built_in">type</span>=<span class="hljs-string">"number"</span>, value=<span class="hljs-number">1</span>, label=<span class="hljs-string">"Duration in seconds"</span>),
	],
	<span class="hljs-string">"audio"</span>,
	).launch()<!-- HTML_TAG_END --></pre></div> <iframe src="https://course-demos-generate-tone.hf.space" frameborder="0" height="450" title="Gradio app" class="container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <h3 class="relative group"><a id="launch-方法" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#launch-方法"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>launch() 方法</span></h3> <p data-svelte-h="svelte-i5i6pl">到目前为止，我们已经使用了 <code>launch()</code> 方法来启动界面，但是我们还没有真正讨论过它的作用。</p> <p data-svelte-h="svelte-1n427fw">默认情况下， <code>launch()</code> 方法将在本地运行一个 Web 服务器来启动演示。如果你在 Jupyter 或 Colab Notebook 中运行代码，那么 Gradio 会将演示 GUI 嵌入到 Notebook 中，以便你可以轻松使用它。</p> <p data-svelte-h="svelte-c1su3i">你可以通过不同的参数自定义 <code>launch()</code> 的行为：</p> <ul data-svelte-h="svelte-8gq6"><li><code>inline</code> —— 是否在 Python Notebook 中内联显示接口。</li> <li><code>inbrowser</code> —— 是否在默认浏览器的新标签页中自动打开演示页面。</li> <li><code>share</code> —— 是否在你的计算机上创建一个公开可共享的链接。有点像 Google Drive 的链接！</li></ul> <p data-svelte-h="svelte-bqa4yo">我们将在下一节中更详细地介绍 <code>share</code> 参数！</p> <h2 class="relative group"><a id="-让我们实践一下" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#-让我们实践一下"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>✏️ 让我们实践一下！</span></h2> <p data-svelte-h="svelte-sd6hz4">让我们构建一个演示语音识别模型的演示。为了让它变得有趣，我们将同时支持从麦克风实时录制或上传的文件。</p> <p data-svelte-h="svelte-lw56ip">像往常一样，我们将使用 🤗 Transformers 中的 <code>pipeline()</code> 函数加载我们的语音识别模型。如果你需要快速复习回顾，你可以返回第一章。接下来，我们将实现一个 <code>transcribe_audio()</code> 函数来处理音频并返回转录后的文本。最后，我们将把这个函数包装在一个 <code>Interface</code> 中，将输入的类型设置为 <code>Audio</code> 组件，将输出的类型设置为文本。汇总起来，这个演示的代码如下：</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
	<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr

	model = pipeline(<span class="hljs-string">"automatic-speech-recognition"</span>)


	<span class="hljs-keyword">def</span> <span class="hljs-title function_">transcribe_audio</span>(<span class="hljs-params">mic=<span class="hljs-literal">None</span>, file=<span class="hljs-literal">None</span></span>):
	<span class="hljs-keyword">if</span> mic <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>:
	audio = mic
	<span class="hljs-keyword">elif</span> file <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-literal">None</span>:
	audio = file
	<span class="hljs-keyword">else</span>:
	<span class="hljs-keyword">return</span> <span class="hljs-string">"You must either provide a mic recording or a file"</span>
	transcription = model(audio)[<span class="hljs-string">"text"</span>]
	<span class="hljs-keyword">return</span> transcription


	gr.Interface(
	fn=transcribe_audio,
	inputs=[
	gr.Audio(source=<span class="hljs-string">"microphone"</span>, <span class="hljs-built_in">type</span>=<span class="hljs-string">"filepath"</span>, optional=<span class="hljs-literal">True</span>),
	gr.Audio(source=<span class="hljs-string">"upload"</span>, <span class="hljs-built_in">type</span>=<span class="hljs-string">"filepath"</span>, optional=<span class="hljs-literal">True</span>),
	],
	outputs=<span class="hljs-string">"text"</span>,
	).launch()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6jj0k">如果你的浏览器没有要求你授权麦克风权限，请在<a href="https://huggingface.co/spaces/course-demos/audio-reverse" target="_blank">新标签页中打开演示</a>。</p> <iframe src="https://course-demos-asr.hf.space" frameborder="0" height="550" title="Gradio app" class="container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <p data-svelte-h="svelte-lvhf8i">就是这样！现在你可以使用这个界面来转录音频了。请注意，在这个例子中我们将 <code>optional</code> 参数设置为了 <code>True</code> ，这样用户可以提供麦克风的实时录音或音频文件中任意一种作为输入（或两者都不提供，但这将返回错误消息）。</p> <p data-svelte-h="svelte-1qe7s93">接下来，我们将学习如何与他人分享你的演示！</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/zh-CN/chapter9/3.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1"><</span> <span data-svelte-h="svelte-x0xyl0">></span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>

	<script>
	{
	__sveltekit_9aawfw = {
	assets: "/docs/course/pr_1021/zh-CN",
	base: "/docs/course/pr_1021/zh-CN",
	env: {}
	};

	const element = document.currentScript.parentElement;

	const data = [null,null];

	Promise.all([
	import("/docs/course/pr_1021/zh-CN/_app/immutable/entry/start.f3a1a511.js"),
	import("/docs/course/pr_1021/zh-CN/_app/immutable/entry/app.c39e37cf.js")
	]).then(([kit, app]) => {
	kit.start(app, element, {
	node_ids: [0, 72],
	data,
	form: null,
	error: null
	});
	});
	}
	</script>

Xet Storage Details

Size:: 28.7 kB
Xet hash:: 6ccbdba874dd07a9fd1fd8e374a76c4419133c8b5c92f4d4aa5dcf96720f1403

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.