Buckets:

rtrm's picture
download
raw
57.6 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Create your own transcription app&quot;,&quot;local&quot;:&quot;create-your-own-transcription-app&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Create your transcription endpoint&quot;,&quot;local&quot;:&quot;create-your-transcription-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create your text generation endpoint&quot;,&quot;local&quot;:&quot;create-your-text-generation-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Test your endpoints&quot;,&quot;local&quot;:&quot;test-your-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Get your endpoint details&quot;,&quot;local&quot;:&quot;get-your-endpoint-details&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Building the transcription app&quot;,&quot;local&quot;:&quot;building-the-transcription-app&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Step 1: Set up dependencies and imports&quot;,&quot;local&quot;:&quot;step-1-set-up-dependencies-and-imports&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 2: Configure your endpoint connections&quot;,&quot;local&quot;:&quot;step-2-configure-your-endpoint-connections&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 3: Create the transcription function&quot;,&quot;local&quot;:&quot;step-3-create-the-transcription-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 4: Create the summarization function&quot;,&quot;local&quot;:&quot;step-4-create-the-summarization-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 5: Wrap it all together&quot;,&quot;local&quot;:&quot;step-5-wrap-it-all-together&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy your transcription app&quot;,&quot;local&quot;:&quot;deploy-your-transcription-app&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/inference-endpoints/pr_136/en/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/entry/start.fb9ab4d6.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/scheduler.f6b352c8.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/singletons.ceca4163.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/index.26cf6c5a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/paths.142cd5df.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/entry/app.6247727a.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/index.b90df637.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/nodes/0.2fcde12d.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/nodes/24.8e032ccd.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/Tip.366d2e6e.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/CodeBlock.e5718f9d.js">
<link rel="modulepreload" href="/docs/inference-endpoints/pr_136/en/_app/immutable/chunks/getInferenceSnippets.1e3ae0bf.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;Create your own transcription app&quot;,&quot;local&quot;:&quot;create-your-own-transcription-app&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Create your transcription endpoint&quot;,&quot;local&quot;:&quot;create-your-transcription-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Create your text generation endpoint&quot;,&quot;local&quot;:&quot;create-your-text-generation-endpoint&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Test your endpoints&quot;,&quot;local&quot;:&quot;test-your-endpoints&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Get your endpoint details&quot;,&quot;local&quot;:&quot;get-your-endpoint-details&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Building the transcription app&quot;,&quot;local&quot;:&quot;building-the-transcription-app&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;Step 1: Set up dependencies and imports&quot;,&quot;local&quot;:&quot;step-1-set-up-dependencies-and-imports&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 2: Configure your endpoint connections&quot;,&quot;local&quot;:&quot;step-2-configure-your-endpoint-connections&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 3: Create the transcription function&quot;,&quot;local&quot;:&quot;step-3-create-the-transcription-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 4: Create the summarization function&quot;,&quot;local&quot;:&quot;step-4-create-the-summarization-function&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;Step 5: Wrap it all together&quot;,&quot;local&quot;:&quot;step-5-wrap-it-all-together&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Deploy your transcription app&quot;,&quot;local&quot;:&quot;deploy-your-transcription-app&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Next steps&quot;,&quot;local&quot;:&quot;next-steps&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="create-your-own-transcription-app" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-your-own-transcription-app"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create your own transcription app</span></h1> <p data-svelte-h="svelte-11kksmv">This tutorial will guide you through building a complete transcription application using Hugging Face Inference Endpoints. We’ll create an app that can transcribe audio files and generate intelligent summaries with action items - perfect for meeting notes, interviews, or any audio content.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1kzcndf">This tutorial uses Python and Gradio, but you can adapt the approach to any language that can make HTTP requests. The models deployed on Inference Endpoints use standard APIs, so you can integrate them into web applications, mobile apps, or any other system.</p></div> <h2 class="relative group"><a id="create-your-transcription-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-your-transcription-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create your transcription endpoint</span></h2> <p data-svelte-h="svelte-z0skyq">First, we need to create an Inference Endpoint for audio transcription. We’ll use OpenAI’s Whisper model for high-quality speech recognition.</p> <p data-svelte-h="svelte-2crr1b">Start by navigating to the Inference Endpoints UI, and once you have logged in you should see a button for creating a new Inference Endpoint. Click the “New” button.</p> <p data-svelte-h="svelte-dnyg4"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/quick_start/1-new-button.png" alt="new-button"></p> <p data-svelte-h="svelte-4f7swo">From there you’ll be directed to the catalog. The Model Catalog consists of popular models which have tuned configurations to work as one-click deploys. You can filter by name, task, price of the hardware and much more.</p> <p data-svelte-h="svelte-fxecmn"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/quick_start/2-catalog.png" alt="catalog"></p> <p data-svelte-h="svelte-csv139">Search for “whisper” to find transcription models, or you can create a custom endpoint with <a href="https://huggingface.co/openai/whisper-large-v3" rel="nofollow">openai/whisper-large-v3</a>. This model provides excellent transcription quality for multiple languages and handles various audio formats.</p> <p data-svelte-h="svelte-lld6nd">For transcription models, we recommend:</p> <ul data-svelte-h="svelte-10gw77w"><li><strong>GPU</strong>: NVIDIA L4 or A10G for good performance with audio processing</li> <li><strong>Instance Size</strong>: x1 (sufficient for most transcription workloads)</li> <li><strong>Auto-scaling</strong>: Enable scale-to-zero to save costs when not in use</li></ul> <p data-svelte-h="svelte-1shrp82">Click “Create Endpoint” to deploy your transcription service.</p> <p data-svelte-h="svelte-1a0gqw5"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/transcriptions/config.png" alt="config"></p> <p data-svelte-h="svelte-r8kuit">Your endpoint will take about 5 minutes to initialize. Once it’s ready, you’ll see it in the “Running” state.</p> <h2 class="relative group"><a id="create-your-text-generation-endpoint" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#create-your-text-generation-endpoint"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Create your text generation endpoint</span></h2> <p data-svelte-h="svelte-13t63ie">Now let’s do the same again but now for a text generation model. For generating summaries and action items, we’ll create a second endpoint using the <a href="https://huggingface.co/Qwen/Qwen3-1.7B" rel="nofollow">Qwen/Qwen3-1.7B</a> model.</p> <p data-svelte-h="svelte-6jil6r">Follow the same process:</p> <ol data-svelte-h="svelte-1vkmkhw"><li>Click “New” button in the Inference Endpoints UI</li> <li>Search for <code>qwen3 1.7b</code> in the catalog</li> <li>The NVIDIA L4 with x1 instance size is recommended for this model</li> <li>Keep the default settings (scale-to-zero enabled, 1-hour timeout)</li> <li>Click “Create Endpoint”</li></ol> <p data-svelte-h="svelte-1hrp0qn">This model is optimized for text generation tasks and will provide excellent summarization capabilities. Both endpoints will take about 3-5 minutes to initialize.</p> <h2 class="relative group"><a id="test-your-endpoints" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#test-your-endpoints"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Test your endpoints</span></h2> <p data-svelte-h="svelte-o2p4m3">Once your endpoints are running, you can test them in the playground. The transcription endpoint will accept audio files and return text transcripts.</p> <p data-svelte-h="svelte-12e9kdf"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/transcriptions/playground.png" alt="playground"></p> <p data-svelte-h="svelte-1rqdtmf">Test with a short audio sample to verify the transcription quality.</p> <h2 class="relative group"><a id="get-your-endpoint-details" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#get-your-endpoint-details"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Get your endpoint details</span></h2> <p data-svelte-h="svelte-13oqpkl">You’ll need the endpoint details from your <a href="https://endpoints.huggingface.co/" rel="nofollow">endpoints page</a>:</p> <ul data-svelte-h="svelte-1mxq0ya"><li><strong>Base URL</strong>: <code>https://&lt;endpoint-name&gt;.endpoints.huggingface.cloud/v1/</code></li> <li><strong>Model name</strong>: The name of your endpoint</li> <li><strong>Token</strong>: Your HF token from <a href="https://huggingface.co/settings/tokens" rel="nofollow">settings</a></li></ul> <p data-svelte-h="svelte-2v5kr8"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/chatbot/endpoint-page.png" alt="endpoint-details"></p> <p data-svelte-h="svelte-5vve8m">You can validate your details by testing your endpoint out in the command line with curl.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->curl <span class="hljs-string">&quot;&lt;endpoint-url&gt;&quot;</span> \
-X POST \
--data-binary <span class="hljs-string">&#x27;@&lt;audio-file&gt;&#x27;</span> \
-H <span class="hljs-string">&quot;Accept: application/json&quot;</span> \
-H <span class="hljs-string">&quot;Content-Type: audio/flac&quot;</span> \<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="building-the-transcription-app" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#building-the-transcription-app"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Building the transcription app</span></h2> <p data-svelte-h="svelte-1jgxvf2">Now let’s build a transcription application step by step. We’ll break it down into logical blocks to create a complete solution that can transcribe audio and generate intelligent summaries.</p> <h3 class="relative group"><a id="step-1-set-up-dependencies-and-imports" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-1-set-up-dependencies-and-imports"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 1: Set up dependencies and imports</span></h3> <p data-svelte-h="svelte-u4wya2">We’ll use the <code>requests</code> library to connect to both endpoints and <code>gradio</code> to create the interface. Let’s install the required packages:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install gradio requests<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16rd7fj">Then, set up your imports in a new Python file:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">import</span> requests<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-2-configure-your-endpoint-connections" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-2-configure-your-endpoint-connections"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 2: Configure your endpoint connections</span></h3> <p data-svelte-h="svelte-14dcghr">Set up the configuration to connect to both your transcription and summarization endpoints based on the details you collected in the previous steps.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Configuration for both endpoints</span>
TRANSCRIPTION_ENDPOINT = <span class="hljs-string">&quot;https://your-whisper-endpoint.endpoints.huggingface.cloud/api/v1/audio/transcriptions&quot;</span>
SUMMARIZATION_ENDPOINT = <span class="hljs-string">&quot;https://your-qwen-endpoint.endpoints.huggingface.cloud/v1/chat/completions&quot;</span>
HF_TOKEN = os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>) <span class="hljs-comment"># Your Hugging Face Hub token</span>
<span class="hljs-comment"># Headers for authentication</span>
headers = {
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{HF_TOKEN}</span>&quot;</span>
}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vvhq4u">Your endpoints are now configured to handle both audio transcription and text summarization.</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1ene528">You might also want to use <code>os.getenv</code> for your endpoint details.</p></div> <h3 class="relative group"><a id="step-3-create-the-transcription-function" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-3-create-the-transcription-function"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 3: Create the transcription function</span></h3> <p data-svelte-h="svelte-llct1j">Next, we’ll create a function to handle audio file uploads and transcription:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">transcribe_audio</span>(<span class="hljs-params">audio_file_path</span>):
<span class="hljs-string">&quot;&quot;&quot;Transcribe audio using direct requests to the endpoint&quot;&quot;&quot;</span>
<span class="hljs-comment"># Read audio file and prepare for upload</span>
<span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(audio_file_path, <span class="hljs-string">&quot;rb&quot;</span>) <span class="hljs-keyword">as</span> audio_file:
<span class="hljs-comment"># Read the audio file as binary data and represent it as a file object</span>
files = {<span class="hljs-string">&quot;file&quot;</span>: audio_file.read()}
<span class="hljs-comment"># Make the request to the transcription endpoint</span>
response = requests.post(TRANSCRIPTION_ENDPOINT, headers=headers, files=files)
<span class="hljs-comment"># Check if the request was successful</span>
<span class="hljs-keyword">if</span> response.status_code == <span class="hljs-number">200</span>:
result = response.json()
<span class="hljs-keyword">return</span> result.get(<span class="hljs-string">&quot;text&quot;</span>, <span class="hljs-string">&quot;No transcription available&quot;</span>)
<span class="hljs-keyword">else</span>:
<span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Error: <span class="hljs-subst">{response.status_code}</span> - <span class="hljs-subst">{response.text}</span>&quot;</span><!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1xad941">The transcription endpoint expects a file upload in the <code>files</code> parameter. Make sure to read the audio file as binary data and pass it correctly to the API.</p></div> <h3 class="relative group"><a id="step-4-create-the-summarization-function" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-4-create-the-summarization-function"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 4: Create the summarization function</span></h3> <p data-svelte-h="svelte-1afb0a0">Now we’ll create a function to generate summaries from the transcribed text. We’ll do some simple prompt engineering to get the best results.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_summary</span>(<span class="hljs-params">transcript</span>):
<span class="hljs-string">&quot;&quot;&quot;Generate summary using requests to the chat completions endpoint&quot;&quot;&quot;</span>
<span class="hljs-comment"># define a nice prompt to get the best results for our use case</span>
prompt = <span class="hljs-string">f&quot;&quot;&quot;
Analyze this meeting transcript and provide:
1. A concise summary of key points
2. Action items with responsible parties
3. Important decisions made
Transcript: <span class="hljs-subst">{transcript}</span>
Format with clear sections:
## Summary
## Action Items
## Decisions Made
&quot;&quot;&quot;</span>
<span class="hljs-comment"># Prepare the payload using the Messages API format</span>
payload = {
<span class="hljs-string">&quot;model&quot;</span>: <span class="hljs-string">&quot;your-qwen-endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint</span>
<span class="hljs-string">&quot;messages&quot;</span>: [{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: prompt}],
<span class="hljs-string">&quot;max_tokens&quot;</span>: <span class="hljs-number">1000</span>, <span class="hljs-comment"># we can also set a max_tokens parameter to limit the length of the response</span>
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>, <span class="hljs-comment"># we might want to set lower temperature for more deterministic results</span>
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">False</span> <span class="hljs-comment"># we don&#x27;t need streaming for this use case</span>
}
<span class="hljs-comment"># Headers for chat completions</span>
chat_headers = {
<span class="hljs-string">&quot;Accept&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Content-Type&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{HF_TOKEN}</span>&quot;</span>
}
<span class="hljs-comment"># Make the request</span>
response = requests.post(SUMMARIZATION_ENDPOINT, headers=chat_headers, json=payload)
response.raise_for_status()
<span class="hljs-comment"># Parse the response</span>
result = response.json()
<span class="hljs-keyword">return</span> result[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;message&quot;</span>][<span class="hljs-string">&quot;content&quot;</span>]<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="step-5-wrap-it-all-together" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#step-5-wrap-it-all-together"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Step 5: Wrap it all together</span></h3> <p data-svelte-h="svelte-xfdfb">Now let’s build our Gradio interface. We’ll use the <code>gr.Interface</code> class to create a simple interface that allows us to upload an audio file and see the transcript and summary.</p> <p data-svelte-h="svelte-1kwhso3">First, we’ll create a main processing function that handles the complete workflow.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">process_meeting_audio</span>(<span class="hljs-params">audio_file</span>):
<span class="hljs-string">&quot;&quot;&quot;Main processing function that handles the complete workflow&quot;&quot;&quot;</span>
<span class="hljs-keyword">if</span> audio_file <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
<span class="hljs-keyword">return</span> <span class="hljs-string">&quot;Please upload an audio file.&quot;</span>, <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">try</span>:
<span class="hljs-comment"># Step 1: Transcribe the audio</span>
transcript = transcribe_audio(audio_file)
<span class="hljs-comment"># Step 2: Generate summary from transcript</span>
summary = generate_summary(transcript)
<span class="hljs-keyword">return</span> transcript, summary
<span class="hljs-keyword">except</span> Exception <span class="hljs-keyword">as</span> e:
<span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Error processing audio: <span class="hljs-subst">{<span class="hljs-built_in">str</span>(e)}</span>&quot;</span>, <span class="hljs-string">&quot;&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-byxuh9">Then, we can run that function in a Gradio interface. We’ll add some descriptions and a title to make it more user-friendly.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-comment"># Create Gradio interface</span>
app = gr.Interface(
fn=process_meeting_audio,
inputs=gr.Audio(label=<span class="hljs-string">&quot;Upload Meeting Audio&quot;</span>, <span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;filepath&quot;</span>),
outputs=[
gr.Textbox(label=<span class="hljs-string">&quot;Full Transcript&quot;</span>, lines=<span class="hljs-number">10</span>),
gr.Textbox(label=<span class="hljs-string">&quot;Meeting Summary&quot;</span>, lines=<span class="hljs-number">8</span>),
],
title=<span class="hljs-string">&quot;🎤 AI Meeting Notes&quot;</span>,
description=<span class="hljs-string">&quot;Upload audio to get instant transcripts and summaries.&quot;</span>,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p0i73e">That’s it! You can now run the app locally with <code>python app.py</code> and test it out.</p> <details><summary data-svelte-h="svelte-1ri4alz">Click to view the complete script</summary> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> gradio <span class="hljs-keyword">as</span> gr
<span class="hljs-keyword">import</span> os
<span class="hljs-keyword">import</span> requests
<span class="hljs-comment"># Configuration for both endpoints</span>
TRANSCRIPTION_ENDPOINT = <span class="hljs-string">&quot;https://your-whisper-endpoint.endpoints.huggingface.cloud/api/v1/audio/transcriptions&quot;</span>
SUMMARIZATION_ENDPOINT = <span class="hljs-string">&quot;https://your-qwen-endpoint.endpoints.huggingface.cloud/v1/chat/completions&quot;</span>
HF_TOKEN = os.getenv(<span class="hljs-string">&quot;HF_TOKEN&quot;</span>) <span class="hljs-comment"># Your Hugging Face Hub token</span>
<span class="hljs-comment"># Headers for authentication</span>
headers = {
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{HF_TOKEN}</span>&quot;</span>
}
<span class="hljs-keyword">def</span> <span class="hljs-title function_">transcribe_audio</span>(<span class="hljs-params">audio_file_path</span>):
<span class="hljs-string">&quot;&quot;&quot;Transcribe audio using direct requests to the endpoint&quot;&quot;&quot;</span>
<span class="hljs-comment"># Read audio file and prepare for upload</span>
<span class="hljs-keyword">with</span> <span class="hljs-built_in">open</span>(audio_file_path, <span class="hljs-string">&quot;rb&quot;</span>) <span class="hljs-keyword">as</span> audio_file:
files = {<span class="hljs-string">&quot;file&quot;</span>: audio_file.read()}
<span class="hljs-comment"># Make the request to the transcription endpoint</span>
response = requests.post(TRANSCRIPTION_ENDPOINT, headers=headers, files=files)
<span class="hljs-keyword">if</span> response.status_code == <span class="hljs-number">200</span>:
result = response.json()
<span class="hljs-keyword">return</span> result.get(<span class="hljs-string">&quot;text&quot;</span>, <span class="hljs-string">&quot;No transcription available&quot;</span>)
<span class="hljs-keyword">else</span>:
<span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Error: <span class="hljs-subst">{response.status_code}</span> - <span class="hljs-subst">{response.text}</span>&quot;</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">generate_summary</span>(<span class="hljs-params">transcript</span>):
<span class="hljs-string">&quot;&quot;&quot;Generate summary using requests to the chat completions endpoint&quot;&quot;&quot;</span>
prompt = <span class="hljs-string">f&quot;&quot;&quot;
Analyze this meeting transcript and provide:
1. A concise summary of key points
2. Action items with responsible parties
3. Important decisions made
Transcript: <span class="hljs-subst">{transcript}</span>
Format with clear sections:
## Summary
## Action Items
## Decisions Made
&quot;&quot;&quot;</span>
<span class="hljs-comment"># Prepare the payload using the Messages API format</span>
payload = {
<span class="hljs-string">&quot;model&quot;</span>: <span class="hljs-string">&quot;your-qwen-endpoint-name&quot;</span>, <span class="hljs-comment"># Use the name of your endpoint</span>
<span class="hljs-string">&quot;messages&quot;</span>: [{<span class="hljs-string">&quot;role&quot;</span>: <span class="hljs-string">&quot;user&quot;</span>, <span class="hljs-string">&quot;content&quot;</span>: prompt}],
<span class="hljs-string">&quot;max_tokens&quot;</span>: <span class="hljs-number">1000</span>,
<span class="hljs-string">&quot;temperature&quot;</span>: <span class="hljs-number">0.7</span>,
<span class="hljs-string">&quot;stream&quot;</span>: <span class="hljs-literal">False</span>
}
<span class="hljs-comment"># Headers for chat completions</span>
chat_headers = {
<span class="hljs-string">&quot;Accept&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Content-Type&quot;</span>: <span class="hljs-string">&quot;application/json&quot;</span>,
<span class="hljs-string">&quot;Authorization&quot;</span>: <span class="hljs-string">f&quot;Bearer <span class="hljs-subst">{HF_TOKEN}</span>&quot;</span>
}
<span class="hljs-comment"># Make the request</span>
response = requests.post(SUMMARIZATION_ENDPOINT, headers=chat_headers, json=payload)
response.raise_for_status()
<span class="hljs-comment"># Parse the response</span>
result = response.json()
<span class="hljs-keyword">return</span> result[<span class="hljs-string">&quot;choices&quot;</span>][<span class="hljs-number">0</span>][<span class="hljs-string">&quot;message&quot;</span>][<span class="hljs-string">&quot;content&quot;</span>]
<span class="hljs-keyword">def</span> <span class="hljs-title function_">process_meeting_audio</span>(<span class="hljs-params">audio_file</span>):
<span class="hljs-string">&quot;&quot;&quot;Main processing function that handles the complete workflow&quot;&quot;&quot;</span>
<span class="hljs-keyword">if</span> audio_file <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>:
<span class="hljs-keyword">return</span> <span class="hljs-string">&quot;Please upload an audio file.&quot;</span>, <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-keyword">try</span>:
<span class="hljs-comment"># Step 1: Transcribe the audio</span>
transcript = transcribe_audio(audio_file)
<span class="hljs-comment"># Step 2: Generate summary from transcript</span>
summary = generate_summary(transcript)
<span class="hljs-keyword">return</span> transcript, summary
<span class="hljs-keyword">except</span> Exception <span class="hljs-keyword">as</span> e:
<span class="hljs-keyword">return</span> <span class="hljs-string">f&quot;Error processing audio: <span class="hljs-subst">{<span class="hljs-built_in">str</span>(e)}</span>&quot;</span>, <span class="hljs-string">&quot;&quot;</span>
<span class="hljs-comment"># Create Gradio interface</span>
app = gr.Interface(
fn=process_meeting_audio,
inputs=gr.Audio(label=<span class="hljs-string">&quot;Upload Meeting Audio&quot;</span>, <span class="hljs-built_in">type</span>=<span class="hljs-string">&quot;filepath&quot;</span>),
outputs=[
gr.Textbox(label=<span class="hljs-string">&quot;Full Transcript&quot;</span>, lines=<span class="hljs-number">10</span>),
gr.Textbox(label=<span class="hljs-string">&quot;Meeting Summary&quot;</span>, lines=<span class="hljs-number">8</span>),
],
title=<span class="hljs-string">&quot;🎤 AI Meeting Notes&quot;</span>,
description=<span class="hljs-string">&quot;Upload audio to get instant transcripts and summaries.&quot;</span>,
)
<span class="hljs-keyword">if</span> __name__ == <span class="hljs-string">&quot;__main__&quot;</span>:
app.launch()<!-- HTML_TAG_END --></pre></div></details> <p data-svelte-h="svelte-p5c6xj"><img src="https://raw.githubusercontent.com/huggingface/hf-endpoints-documentation/main/assets/tutorials/transcriptions/app.png" alt="app"></p> <h2 class="relative group"><a id="deploy-your-transcription-app" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#deploy-your-transcription-app"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Deploy your transcription app</span></h2> <p data-svelte-h="svelte-6mo49f">Now, let’s deploy it to Hugging Face Spaces so everyone can use it!</p> <ol data-svelte-h="svelte-1rcpgt7"><li><strong>Create a new Space</strong>: Go to <a href="https://huggingface.co/new-space" rel="nofollow">huggingface.co/new-space</a></li> <li><strong>Choose Gradio SDK</strong> and make it public</li> <li><strong>Upload your files</strong>: Upload <code>app.py</code> and any requirements</li> <li><strong>Add your token</strong>: In Space settings, add <code>HF_TOKEN</code> as a secret</li> <li><strong>Configure hardware</strong>: Consider GPU for faster processing</li> <li><strong>Launch</strong>: Your app will be live at <code>https://huggingface.co/spaces/your-username/your-space-name</code></li></ol> <p data-svelte-h="svelte-1hlivpl">Your transcription app is now ready to handle meeting notes, interviews, podcasts, and any other audio content that needs to be transcribed and summarized!</p> <h2 class="relative group"><a id="next-steps" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#next-steps"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Next steps</span></h2> <p data-svelte-h="svelte-e08du4">Great work! You’ve now built a complete transcription application with intelligent summarization.</p> <p data-svelte-h="svelte-8g6jer">Here are some ways to extend your transcription app:</p> <ul data-svelte-h="svelte-gv9avz"><li><strong>Multi-language support</strong>: Add language detection and support for multiple languages</li> <li><strong>Speaker identification</strong>: Use a model from the hub with speaker diarization capabilities.</li> <li><strong>Custom prompts</strong>: Allow users to customize the summary format and style</li> <li><strong>Implement Text-to-Speech</strong>: Use a model from the hub to convert your summary to another audio file!</li></ul> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/hf-endpoints-documentation/blob/main/docs/source/tutorials/transcription.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1q0n26o = {
assets: "/docs/inference-endpoints/pr_136/en",
base: "/docs/inference-endpoints/pr_136/en",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/inference-endpoints/pr_136/en/_app/immutable/entry/start.fb9ab4d6.js"),
import("/docs/inference-endpoints/pr_136/en/_app/immutable/entry/app.6247727a.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 24],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
57.6 kB
·
Xet hash:
b7327957377096104d657fd0a373e5928660a42f92b7e745b5fbc9c064c32e72

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.