Buckets:

rtrm's picture
download
raw
199 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;提取文本摘要&quot;,&quot;local&quot;:&quot;提取文本摘要&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;準備多語言語料庫&quot;,&quot;local&quot;:&quot;準備多語言語料庫&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文本摘要模型&quot;,&quot;local&quot;:&quot;文本摘要模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;預處理數據&quot;,&quot;local&quot;:&quot;預處理數據&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文本摘要的指標&quot;,&quot;local&quot;:&quot;文本摘要的指標&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;創建強大的baseline&quot;,&quot;local&quot;:&quot;創建強大的baseline&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Trainer API微調mT5&quot;,&quot;local&quot;:&quot;使用-trainer-api微調mt5&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Keras API微調mT5&quot;,&quot;local&quot;:&quot;使用-keras-api微調mt5&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 🤗 Accelerate 微調 mT5&quot;,&quot;local&quot;:&quot;使用--accelerate-微調-mt5&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;為訓練做好一切準備&quot;,&quot;local&quot;:&quot;為訓練做好一切準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;訓練循環&quot;,&quot;local&quot;:&quot;訓練循環&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用您微調的模型&quot;,&quot;local&quot;:&quot;使用您微調的模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/course/pr_1069/zh-TW/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/entry/start.01945326.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/scheduler.37c15a92.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/singletons.40763523.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/index.18351ede.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/paths.677dc4ce.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/entry/app.e8597ff2.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/index.2bf4358c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/nodes/0.decbb3b3.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/nodes/58.63c8d41f.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/Tip.363c041f.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/Youtube.1e50a667.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/CodeBlock.4e987730.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/DocNotebookDropdown.efc1fb7c.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/FrameworkSwitchCourse.8d4d4ab6.js">
<link rel="modulepreload" href="/docs/course/pr_1069/zh-TW/_app/immutable/chunks/getInferenceSnippets.24b50994.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;提取文本摘要&quot;,&quot;local&quot;:&quot;提取文本摘要&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;準備多語言語料庫&quot;,&quot;local&quot;:&quot;準備多語言語料庫&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文本摘要模型&quot;,&quot;local&quot;:&quot;文本摘要模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;預處理數據&quot;,&quot;local&quot;:&quot;預處理數據&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文本摘要的指標&quot;,&quot;local&quot;:&quot;文本摘要的指標&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;創建強大的baseline&quot;,&quot;local&quot;:&quot;創建強大的baseline&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Trainer API微調mT5&quot;,&quot;local&quot;:&quot;使用-trainer-api微調mt5&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 Keras API微調mT5&quot;,&quot;local&quot;:&quot;使用-keras-api微調mt5&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用 🤗 Accelerate 微調 mT5&quot;,&quot;local&quot;:&quot;使用--accelerate-微調-mt5&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;為訓練做好一切準備&quot;,&quot;local&quot;:&quot;為訓練做好一切準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;訓練循環&quot;,&quot;local&quot;:&quot;訓練循環&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;使用您微調的模型&quot;,&quot;local&quot;:&quot;使用您微調的模型&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="bg-white leading-none border border-gray-100 rounded-lg flex p-0.5 w-56 text-sm mb-4"><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-l bg-red-50 dark:bg-transparent text-red-600" href="?fw=pt"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> Pytorch </a><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-r text-gray-500 filter grayscale" href="?fw=tf"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.94em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 274"><path d="M145.726 42.065v42.07l72.861 42.07v-42.07l-72.86-42.07zM0 84.135v42.07l36.43 21.03V105.17L0 84.135zm109.291 21.035l-36.43 21.034v126.2l36.43 21.035v-84.135l36.435 21.035v-42.07l-36.435-21.034V105.17z" fill="#E55B2D"></path><path d="M145.726 42.065L36.43 105.17v42.065l72.861-42.065v42.065l36.435-21.03v-84.14zM255.022 63.1l-36.435 21.035v42.07l36.435-21.035V63.1zm-72.865 84.135l-36.43 21.035v42.07l36.43-21.036v-42.07zm-36.43 63.104l-36.436-21.035v84.135l36.435-21.035V210.34z" fill="#ED8E24"></path><path d="M145.726 0L0 84.135l36.43 21.035l109.296-63.105l72.861 42.07L255.022 63.1L145.726 0zm0 126.204l-36.435 21.03l36.435 21.036l36.43-21.035l-36.43-21.03z" fill="#F8BF3C"></path></svg> TensorFlow </a></div> <h1 class="relative group"><a id="提取文本摘要" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#提取文本摘要"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>提取文本摘要</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/chapter7/section5_pt.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/chapter7/section5_pt.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <p data-svelte-h="svelte-19rxt6u">在本節中,我們將看看如何使用 Transformer 模型將長文檔壓縮為摘要,這項任務稱為文本摘要.這是最具挑戰性的 NLP 任務之一,因為它需要一系列能力,例如理解長篇文章和生成能夠捕捉文檔中主要主題的連貫文本。但是,如果做得好,文本摘要是一種強大的工具,可以減輕領域專家詳細閱讀長文檔的負擔,從而加快各種業務流程。</p> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/yHnr5Dk2zCI" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1oslg25">儘管在<a href="https://huggingface.co/models?pipeline_tag=summarization=downloads" rel="nofollow">Hugging Face Hub</a>上已經存在各種微調模型用於文本摘要,幾乎所有這些都只適用於英文文檔。因此,為了在本節中添加一些變化,我們將為英語和西班牙語訓練一個雙語模型。在本節結束時,您將有一個可以總結客戶評論的<a href="https://huggingface.co/huggingface-course/mt5-small-finetuned-amazon-en-es" rel="nofollow">模型</a></p> <iframe src="https://course-demos-mt5-small-finetuned-amazon-en-es.hf.space" frameborder="0" height="400" title="Gradio app" class="block dark:hidden container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <iframe src="https://course-demos-mt5-small-finetuned-amazon-en-es-darkmode.hf.space" frameborder="0" height="400" title="Gradio app" class="hidden dark:block container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <p data-svelte-h="svelte-1mp60xs">如下所示:正如我們將看到的,這些摘要很簡潔,因為它們是從客戶在產品評論中提供的標題中學到的。讓我們首先為這項任務準備一個合適的雙語語料庫。</p> <h2 class="relative group"><a id="準備多語言語料庫" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#準備多語言語料庫"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>準備多語言語料庫</span></h2> <p data-svelte-h="svelte-iqutbd">我們將使用<a href="https://huggingface.co/datasets/amazon_reviews_multi" rel="nofollow">多語言亞馬遜評論語料庫</a>創建我們的雙語摘要器。該語料庫由六種語言的亞馬遜產品評論組成,通常用於對多語言分類器進行基準測試。然而,由於每條評論都附有一個簡短的標題,我們可以使用標題作為我們模型學習的目標摘要!首先,讓我們從 Hugging Face Hub 下載英語和西班牙語子集:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
spanish_dataset = load_dataset(<span class="hljs-string">&quot;amazon_reviews_multi&quot;</span>, <span class="hljs-string">&quot;es&quot;</span>)
english_dataset = load_dataset(<span class="hljs-string">&quot;amazon_reviews_multi&quot;</span>, <span class="hljs-string">&quot;en&quot;</span>)
english_dataset<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">200000</span>
})
validation: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">5000</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">5000</span>
})
})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ad4cuj">如您所見,對於每種語言,都有 200,000 條評論 <strong>train</strong> 拆分,每個評論有 5,000 條評論 <strong>validation</strong><strong>test</strong> 分裂。我們感興趣的評論信息包含在 <strong>review_body</strong><strong>review_title</strong> 列。讓我們通過創建一個簡單的函數來查看一些示例,該函數使用我們在<a href="/course/chapter5">第五章</a>學到過:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">show_samples</span>(<span class="hljs-params">dataset, num_samples=<span class="hljs-number">3</span>, seed=<span class="hljs-number">42</span></span>):
sample = dataset[<span class="hljs-string">&quot;train&quot;</span>].shuffle(seed=seed).select(<span class="hljs-built_in">range</span>(num_samples))
<span class="hljs-keyword">for</span> example <span class="hljs-keyword">in</span> sample:
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt; Title: <span class="hljs-subst">{example[<span class="hljs-string">&#x27;review_title&#x27;</span>]}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&#x27;&gt;&gt; Review: <span class="hljs-subst">{example[<span class="hljs-string">&#x27;review_body&#x27;</span>]}</span>&#x27;&quot;</span>)
show_samples(english_dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: Worked in front position, not rear&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: 3 stars because these are not rear brakes as stated in the item description. At least the mount adapter only worked on the front fork of the bike that I got it for.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: meh&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Does it’s job and it’s gorgeous but mine is falling apart, I had to basically put it together again with hot glue&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Can\&#x27;t beat these for the money&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Bought this for handling miscellaneous aircraft parts and hanger &quot;stuff&quot; that I needed to organize; it really fit the bill. The unit arrived quickly, was well packaged and arrived intact (always a good sign). There are five wall mounts-- three on the top and two on the bottom. I wanted to mount it on the wall, so all I had to do was to remove the top two layers of plastic drawers, as well as the bottom corner drawers, place it when I wanted and mark it; I then used some of the new plastic screw in wall anchors (the 50 pound variety) and it easily mounted to the wall. Some have remarked that they wanted dividers for the drawers, and that they made those. Good idea. My application was that I needed something that I can see the contents at about eye level, so I wanted the fuller-sized drawers. I also like that these are the new plastic that doesn\&#x27;t get brittle and split like my older plastic drawers did. I like the all-plastic construction. It\&#x27;s heavy duty enough to hold metal parts, but being made of plastic it\&#x27;s not as heavy as a metal frame, so you can easily mount it to the wall and still load it up with heavy stuff, or light stuff. No problem there. For the money, you can\&#x27;t beat it. Best one of these I\&#x27;ve bought to date-- and I\&#x27;ve been using some version of these for over forty years.&#x27;</span><!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-dnykow">✏️ <strong>試試看!</strong> 更改 <code>Dataset.shuffle()</code> 命令中的隨機種子以探索語料庫中的其他評論。 如果您是說西班牙語的人,請查看 <code>spanish_dataset</code> 中的一些評論,看看標題是否也像合理的摘要。</p></div> <p data-svelte-h="svelte-18zemtu">此示例顯示了人們通常在網上找到的評論的多樣性,從正面到負面(以及介於兩者之間的所有內容!)。儘管標題為“meh”的示例信息量不大,但其他標題看起來像是對評論本身的體面總結。在單個 GPU 上訓練所有 400,000 條評論的摘要模型將花費太長時間,因此我們將專注於為單個產品領域生成摘要。為了瞭解我們可以選擇哪些域,讓我們將 <strong>english_dataset</strong> 轉換到 <strong>pandas.DataFrame</strong> 並計算每個產品類別的評論數量:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->english_dataset.set_format(<span class="hljs-string">&quot;pandas&quot;</span>)
english_df = english_dataset[<span class="hljs-string">&quot;train&quot;</span>][:]
<span class="hljs-comment"># Show counts for top 20 products</span>
english_df[<span class="hljs-string">&quot;product_category&quot;</span>].value_counts()[:<span class="hljs-number">20</span>]<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->home <span class="hljs-number">17679</span>
apparel <span class="hljs-number">15951</span>
wireless <span class="hljs-number">15717</span>
other <span class="hljs-number">13418</span>
beauty <span class="hljs-number">12091</span>
drugstore <span class="hljs-number">11730</span>
kitchen <span class="hljs-number">10382</span>
toy <span class="hljs-number">8745</span>
sports <span class="hljs-number">8277</span>
automotive <span class="hljs-number">7506</span>
lawn_and_garden <span class="hljs-number">7327</span>
home_improvement <span class="hljs-number">7136</span>
pet_products <span class="hljs-number">7082</span>
digital_ebook_purchase <span class="hljs-number">6749</span>
pc <span class="hljs-number">6401</span>
electronics <span class="hljs-number">6186</span>
office_product <span class="hljs-number">5521</span>
shoes <span class="hljs-number">5197</span>
grocery <span class="hljs-number">4730</span>
book <span class="hljs-number">3756</span>
Name: product_category, dtype: int64<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pylu96">英語數據集中最受歡迎的產品是家居用品、服裝和無線電子產品。不過,為了堅持亞馬遜的主題,讓我們專注於總結書籍的評論——畢竟,這是亞馬遜這家公司成立的基礎!我們可以看到兩個符合要求的產品類別( <strong>book</strong><strong>digital_ebook_purchase</strong> ),所以讓我們為這些產品過濾兩種語言的數據集。正如我們在<a href="/course/chapter5">第五章</a>學到的, 這 <strong>Dataset.filter()</strong> 函數允許我們非常有效地對數據集進行切片,因此我們可以定義一個簡單的函數來執行此操作:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">filter_books</span>(<span class="hljs-params">example</span>):
<span class="hljs-keyword">return</span> (
example[<span class="hljs-string">&quot;product_category&quot;</span>] == <span class="hljs-string">&quot;book&quot;</span>
<span class="hljs-keyword">or</span> example[<span class="hljs-string">&quot;product_category&quot;</span>] == <span class="hljs-string">&quot;digital_ebook_purchase&quot;</span>
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14rncl5">現在,當我們將此函數應用於 <strong>english_dataset</strong><strong>spanish_dataset</strong> ,結果將只包含涉及書籍類別的那些行。在應用過濾器之前,讓我們將<strong>english_dataset</strong>的格式從 <strong>pandas</strong> 切換回到 <strong>arrow</strong></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->english_dataset.reset_format()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1sksjg">然後我們可以應用過濾器功能,作為健全性檢查,讓我們檢查評論樣本,看看它們是否確實與書籍有關:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->spanish_books = spanish_dataset.<span class="hljs-built_in">filter</span>(filter_books)
english_books = english_dataset.<span class="hljs-built_in">filter</span>(filter_books)
show_samples(english_books)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: I\&#x27;m dissapointed.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I guess I had higher expectations for this book from the reviews. I really thought I\&#x27;d at least like it. The plot idea was great. I loved Ash but, it just didnt go anywhere. Most of the book was about their radio show and talking to callers. I wanted the author to dig deeper so we could really get to know the characters. All we know about Grace is that she is attractive looking, Latino and is kind of a brat. I\&#x27;m dissapointed.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Good art, good price, poor design&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I had gotten the DC Vintage calendar the past two years, but it was on backorder forever this year and I saw they had shrunk the dimensions for no good reason. This one has good art choices but the design has the fold going through the picture, so it\&#x27;s less aesthetically pleasing, especially if you want to keep a picture to hang. For the price, a good calendar&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Helpful&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Nearly all the tips useful and. I consider myself an intermediate to advanced user of OneNote. I would highly recommend.&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9lfxiy">好的,我們可以看到評論並不是嚴格意義上的書籍,可能是指日曆和 OneNote 等電子應用程序等內容。儘管如此,該領域似乎適合訓練摘要模型。在我們查看適合此任務的各種模型之前,我們還有最後一點數據準備要做:將英語和西班牙語評論合併為一個 <strong>DatasetDict</strong> 目的。 🤗 Datasets 提供了一個方便的 <strong>concatenate_datasets()</strong> 函數(顧名思義)合併 <strong>Dataset</strong> 對象。因此,為了創建我們的雙語數據集,我們將遍歷每個拆分,連接該拆分的數據集,並打亂結果以確保我們的模型不會過度擬合單一語言:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> concatenate_datasets, DatasetDict
books_dataset = DatasetDict()
<span class="hljs-keyword">for</span> split <span class="hljs-keyword">in</span> english_books.keys():
books_dataset[split] = concatenate_datasets(
[english_books[split], spanish_books[split]]
)
books_dataset[split] = books_dataset[split].shuffle(seed=<span class="hljs-number">42</span>)
<span class="hljs-comment"># Peek at a few examples</span>
show_samples(books_dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: Easy to follow!!!!&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I loved The dash diet weight loss Solution. Never hungry. I would recommend this diet. Also the menus are well rounded. Try it. Has lots of the information need thanks.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: PARCIALMENTE DAÑADO&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Me llegó el día que tocaba, junto a otros libros que pedí, pero la caja llegó en mal estado lo cual dañó las esquinas de los libros porque venían sin protección (forro).&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: no lo he podido descargar&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: igual que el anterior&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-158xy6s">這當然看起來像是英語和西班牙語評論的混合!現在我們有了一個訓練語料庫,最後要檢查的一件事是評論中單詞的分佈及其標題。這對於摘要任務尤其重要,其中數據中的簡短參考摘要會使模型偏向於僅在生成的摘要中輸出一兩個單詞。下面的圖顯示了單詞分佈,我們可以看到有些標題嚴重偏向於 1-2 個單詞:</p> <div class="flex justify-center" data-svelte-h="svelte-1pwnbfd"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/review-lengths.svg" alt="Word count distributions for the review titles and texts."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/review-lengths-dark.svg" alt="Word count distributions for the review titles and texts."></div> <p data-svelte-h="svelte-w38ny3">為了解決這個問題,我們將過濾掉標題非常短的示例,以便我們的模型可以生成更有趣的摘要。由於我們正在處理英文和西班牙文文本,因此我們可以使用粗略的啟發式方法在空白處拆分標題,然後使用我們可信賴的 <strong>Dataset.filter()</strong> 方法如下:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->books_dataset = books_dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;review_title&quot;</span>].split()) &gt; <span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qh2yc1">現在我們已經準備好了我們的語料庫,讓我們來看看一些可以對其進行微調的可能的 Transformer 模型!</p> <h2 class="relative group"><a id="文本摘要模型" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#文本摘要模型"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>文本摘要模型</span></h2> <p data-svelte-h="svelte-1yg4ppq">如果你仔細想想,文本摘要是一種類似於機器翻譯的任務:我們有一個像評論這樣的文本正文,我們希望將其“翻譯”成一個較短的版本,以捕捉輸入的顯著特徵。因此,大多數用於文本摘要的 Transformer 模型採用了我們在<a href="/course/chapter1">第一章</a>遇到的編碼器-解碼器架構。儘管有一些例外,例如 GPT 系列模型,它們在few-shot(少量微調)之後也可以提取摘要。下表列出了一些流行的預訓練模型,可以對其進行微調以進行彙總。</p> <table data-svelte-h="svelte-du121x"><thead><tr><th align="center">Transformer 模型</th> <th>描述</th> <th align="center">多種語言?</th></tr></thead> <tbody><tr><td align="center"><a href="https://huggingface.co/gpt2-xl" rel="nofollow">GPT-2</a></td> <td>雖然訓練為自迴歸語言模型,但您可以通過在輸入文本末尾附加“TL;DR”來使 GPT-2 生成摘要。</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/google/pegasus-large" rel="nofollow">PEGASUS</a></td> <td>在預訓練是的目標是來預測多句子文本中的屏蔽句子。 這個預訓練目標比普通語言建模更接近文本摘要,並且在流行的基準測試中得分很高。</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/t5-base" rel="nofollow">T5</a></td> <td>通用的 Transformer 架構,在文本到文本的框架中制定所有任務; 例如,模型文本摘要的輸入格式是<code>summarize: ARTICLE</code></td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/google/mt5-base" rel="nofollow">mT5</a></td> <td>T5 的多語言版本,在多語言 Common Crawl 語料庫 (mC4) 上進行預訓練,涵蓋 101 種語言。</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/facebook/bart-base" rel="nofollow">BART</a></td> <td>一種新穎的 Transformer 架構,其中包含經過訓練的編碼器和解碼器堆棧,以重建被破壞的輸入,結合了 BERT 和 GPT-2 的預訓練方案。</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/facebook/mbart-large-50" rel="nofollow">mBART-50</a></td> <td>BART 的多語言版本,預訓練了 50 種語言。</td> <td align="center"></td></tr></tbody></table> <p data-svelte-h="svelte-1m6yd2o">從此表中可以看出,大多數用於摘要的 Transformer 模型(以及大多數 NLP 任務)都是單語的。如果您的任務是使用“有大量語料庫”的語言(如英語或德語),這很好,但對於世界各地正在使用的數千種其他語言,則不然。幸運的是,有一類多語言 Transformer 模型,如 mT5 和 mBART,可以解決問題。這些模型是使用語言建模進行預訓練的,但有一點不同:它們不是在一種語言的語料庫上訓練,而是同時在 50 多種語言的文本上進行聯合訓練!</p> <p data-svelte-h="svelte-9k2jla">我們將使用 mT5,這是一種基於 T5 的有趣架構,在文本到文本框架中進行了預訓練。在 T5 中,每個 NLP 任務都是根據提示前綴來制定的,例如 <strong>summarize:</strong> 這使模型使生成的文本適應提示。如下圖所示,這讓 T5 變得非常通用,因為你可以用一個模型解決很多任務!</p> <div class="flex justify-center" data-svelte-h="svelte-k7lnur"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/t5.svg" alt="Different tasks performed by the T5 architecture."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/t5-dark.svg" alt="Different tasks performed by the T5 architecture."></div> <p data-svelte-h="svelte-d4pwwt">mT5 不使用前綴,但具有 T5 的大部分功能,並且具有多語言的優勢。現在我們已經選擇了一個模型,讓我們來看看準備我們的訓練數據。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-a0f3vx">✏️ <strong>試試看!</strong> 完成本節後,通過使用相同的技術對 mBART 進行微調,看看 mT5 與 mBART 相比有多好。 對於獎勵積分,您還可以嘗試僅在英文評論上微調 T5。 由於 T5 需要一個特殊的前綴提示,因此您需要在下面的預處理步驟中將“summarize:”添加到輸入示例中。</p></div> <h2 class="relative group"><a id="預處理數據" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#預處理數據"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>預處理數據</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/1m7BerpSq8A" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-15tecul">我們的下一個任務是對我們的評論及其標題進行標記和編碼。像往常一樣,我們首先加載與預訓練模型檢查點相關的標記器。我們將使用 <strong>mt5-small</strong> 作為我們的檢查點,以便我們可以在合理的時間內微調模型:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
model_checkpoint = <span class="hljs-string">&quot;google/mt5-small&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-pcld25">💡在 NLP 項目的早期階段,一個好的做法是在小樣本數據上訓練一類“小”模型。這使您可以更快地調試和迭代端到端工作流。一旦您對結果充滿信心,您始終可以通過簡單地更改模型檢查點來在大規模數據上訓練模型!</p></div> <p data-svelte-h="svelte-13rekop">讓我們在一個小例子上測試 mT5 標記器:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer(<span class="hljs-string">&quot;I loved reading the Hunger Games!&quot;</span>)
inputs<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [<span class="hljs-number">336</span>, <span class="hljs-number">259</span>, <span class="hljs-number">28387</span>, <span class="hljs-number">11807</span>, <span class="hljs-number">287</span>, <span class="hljs-number">62893</span>, <span class="hljs-number">295</span>, <span class="hljs-number">12507</span>, <span class="hljs-number">1</span>], <span class="hljs-string">&#x27;attention_mask&#x27;</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-15k723b">在這裡我們可以看到我們在<a href="/course/chapter3">第三章</a>第一次微調實驗中遇到的熟悉的 <strong>input_ids</strong><strong>attention_mask</strong> .讓我們用分詞器解碼這些輸入 ID ,可以<strong>convert_ids_to_tokens()</strong> 函數來查看我們正在處理什麼樣的標記器:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.convert_ids_to_tokens(inputs.input_ids)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[<span class="hljs-string">&#x27;▁I&#x27;</span>, <span class="hljs-string">&#x27;&#x27;</span>, <span class="hljs-string">&#x27;loved&#x27;</span>, <span class="hljs-string">&#x27;▁reading&#x27;</span>, <span class="hljs-string">&#x27;▁the&#x27;</span>, <span class="hljs-string">&#x27;▁Hung&#x27;</span>, <span class="hljs-string">&#x27;er&#x27;</span>, <span class="hljs-string">&#x27;▁Games&#x27;</span>, <span class="hljs-string">&#x27;&lt;/s&gt;&#x27;</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dmk4g6">特殊的 Unicode 字符 <code></code> 和序列結束標記 <code>&lt;/s&gt;</code> 表明我們正在處理 SentencePiece 分詞器,它基於在<a href="/course/chapter6">第六章</a>中討論的Unigram分詞算法. Unigram 對多語言語料庫特別有用,因為它允許 SentencePiece 不知道重音、標點符號以及許多語言(如日語)沒有空格字符。</p> <p data-svelte-h="svelte-14xuy3i">為了標記我們的語料庫,我們必須處理與摘要相關的細節:因為我們的標籤也是文本,它們可能會超過模型的最大上下文大小。這意味著我們需要對評論及其標題進行截斷,以確保我們不會將過長的輸入傳遞給我們的模型。 🤗 Transformers 中的分詞器提供了一個漂亮的 <strong>as_target_tokenizer()</strong> 函數,它允許您並行分詞並標記標籤的函數。這通常是使用預處理函數內的上下文管理器完成的,該函數首先對輸入進行編碼,然後將標籤編碼為單獨的列。以下是 mT5 的此函數的示例:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->max_input_length = <span class="hljs-number">512</span>
max_target_length = <span class="hljs-number">30</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
model_inputs = tokenizer(
examples[<span class="hljs-string">&quot;review_body&quot;</span>], max_length=max_input_length, truncation=<span class="hljs-literal">True</span>
)
<span class="hljs-comment"># Set up the tokenizer for targets</span>
<span class="hljs-keyword">with</span> tokenizer.as_target_tokenizer():
labels = tokenizer(
examples[<span class="hljs-string">&quot;review_title&quot;</span>], max_length=max_target_length, truncation=<span class="hljs-literal">True</span>
)
model_inputs[<span class="hljs-string">&quot;labels&quot;</span>] = labels[<span class="hljs-string">&quot;input_ids&quot;</span>]
<span class="hljs-keyword">return</span> model_inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1a5w1mm">讓我們通過這段代碼來了解發生了什麼。我們做的第一件事是定義值 <strong>max_input_length</strong><strong>max_target_length</strong> ,它為我們的評論和標題的長度設置了上限。由於評論正文通常比標題大得多,我們相應地調整了這些值。然後,在 <strong>preprocess_function()</strong> 我們可以看到評論首先被標記化,然後是標題在 <strong>as_target_tokenizer()</strong> 函數里也做了相同的處理.</p> <p data-svelte-h="svelte-g594qb">有了 <code>preprocess_function()</code>,我們在整個課程中廣泛使用的方便的 <code>Dataset.map()</code> 函數來標記整個語料庫是一件簡單的事情:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets = books_dataset.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ya70qk">既然語料庫已經預處理完畢,我們來看看一些常用的摘要指標。正如我們將看到的,在衡量機器生成的文本的質量方面沒有靈丹妙藥。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-r2ca50">💡 你可能已經注意到我們在上面的 <code>Dataset.map()</code> 函數中使用了 <code>batched=True</code>。 這會以 1,000 個(默認)為單位對示例進行編碼,並允許您利用 🤗 Transformers 中快速標記器的多線程功能。 在可能的情況下,嘗試使用 <code>batched=True</code> 來加速您的預處理!</p></div> <h2 class="relative group"><a id="文本摘要的指標" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#文本摘要的指標"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>文本摘要的指標</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/TMshhnrEXlg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-rmkt8r">與我們在本課程中涵蓋的大多數其他任務相比,衡量文本生成任務(如摘要或翻譯)的性能並不那麼簡單。例如,對於“我喜歡閱讀飢餓遊戲”這樣的評論,有多個有效摘要,例如“我喜歡飢餓遊戲”或“飢餓遊戲是一本好書”。顯然,在生成的摘要和標籤之間應用某種精確匹配並不是一個好的解決方案——即使是人類在這樣的指標下也會表現不佳,因為我們都有自己的寫作風格。</p> <p data-svelte-h="svelte-46mqaz">總而言之,最常用的指標之一是<a href="https://en.wikipedia.org/wiki/ROUGE_(metric)" rel="nofollow">ROUGE 分數</a>(Recall-Oriented Understudy for Gisting Evaluation 的縮寫)。該指標背後的基本思想是將生成的摘要與一組通常由人類創建的參考摘要進行比較。為了更精確,假設我們要比較以下兩個摘要:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->generated_summary = <span class="hljs-string">&quot;I absolutely loved reading the Hunger Games&quot;</span>
reference_summary = <span class="hljs-string">&quot;I loved reading the Hunger Games&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1j6b4e1">比較它們的一種方法是計算重疊單詞的數量,在這種情況下為 6。但是,這有點粗糙,因此 ROUGE 是基於計算計算重疊的 <em>precision</em><em>recall</em> 分數。。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-ia9gv8">🙋 如果這是您第一次聽說精確率和召回率,請不要擔心——我們將一起通過一些明確的示例來說明一切。 這些指標通常在分類任務中遇到,因此如果您想了解在該上下文中如何定義精確度和召回率,我們建議查看 scikit-learn [指南](<a href="https://scikit-learn.org/stable" rel="nofollow">https://scikit-learn.org/stable</a> /auto_examples/model_selection/plot_precision_recall.html)。</p></div> <p>對於 ROUGE,recall 衡量生成的參考摘要包含了多少參考摘要。如果我們只是比較單詞,recall可以根據以下公式計算:
<!-- HTML_TAG_START --><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mrow><mi mathvariant="normal">R</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">l</mi></mrow><mo>=</mo><mfrac><mrow><mi mathvariant="normal">N</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">v</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">g</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi></mrow><mrow><mi mathvariant="normal">T</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mtext></mtext><mi mathvariant="normal">n</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi><mtext></mtext><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mtext></mtext><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">f</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">e</mi><mtext></mtext><mi mathvariant="normal">s</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">y</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex"> \mathrm{Recall} = \frac{\mathrm{Number\,of\,overlapping\, words}}{\mathrm{Total\, number\, of\, words\, in\, reference\, summary}} </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord"><span class="mord mathrm">Recall</span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:2.2519em;vertical-align:-0.8804em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.3714em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Total</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">in</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">reference</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">summary</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">overlapping</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.8804em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span><!-- HTML_TAG_END --></p> <p>對於我們上面的簡單例子,這個公式給出了 6/6 = 1 的完美召回率;即,參考摘要中的所有單詞都已由模型生成。這聽起來可能很棒,但想象一下,如果我們生成的摘要是“我真的很喜歡整晚閱讀飢餓遊戲”。這也將有完美的recall,但可以說是一個更糟糕的總結,因為它很冗長。為了處理這些場景,我們還計算了pecision,它在 ROUGE 上下文中衡量生成的摘要中有多少是相關的:
<!-- HTML_TAG_START --><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mrow><mi mathvariant="normal">P</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">s</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">n</mi></mrow><mo>=</mo><mfrac><mrow><mi mathvariant="normal">N</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">v</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">g</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi></mrow><mrow><mi mathvariant="normal">T</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mtext></mtext><mi mathvariant="normal">n</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi><mtext></mtext><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mtext></mtext><mi mathvariant="normal">g</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">d</mi><mtext></mtext><mi mathvariant="normal">s</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">y</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex"> \mathrm{Precision} = \frac{\mathrm{Number\,of\,overlapping\, words}}{\mathrm{Total\, number\, of\, words\, in\, generated\, summary}} </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6833em;"></span><span class="mord"><span class="mord mathrm">Precision</span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:2.2519em;vertical-align:-0.8804em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.3714em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Total</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">in</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">generated</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">summary</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">overlapping</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.8804em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span><!-- HTML_TAG_END --></p> <p data-svelte-h="svelte-1y1wy0n">將此應用到我們的詳細摘要中會得到 6/10 = 0.6 的精度,這比我們較短的摘要獲得的 6/7 = 0.86 的精度要差得多。在實踐中,通常計算精度和召回率,然後報告 F1-score(精度和召回率的調和平均值)。我們可以在 🤗 Datasets 中通過安裝 <strong>rouge_score</strong> 包來計算他們:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install rouge_score<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1an8p6d">然後按如下方式加載 ROUGE 指標:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_metric
rouge_score = load_metric(<span class="hljs-string">&quot;rouge&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-w10v88">然後我們可以使用 <strong>rouge_score.compute()</strong> 一次性計算所有指標的函數:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->scores = rouge_score.compute(
predictions=[generated_summary], references=[reference_summary]
)
scores<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;rouge1&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)),
<span class="hljs-string">&#x27;rouge2&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>), mid=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>), high=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>)),
<span class="hljs-string">&#x27;rougeL&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)),
<span class="hljs-string">&#x27;rougeLsum&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>))}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14liyx0">哇,那個輸出中有很多信息——這都是什麼意思?首先,🤗 Datasets實際上計算了精度、召回率和 F1 分數的置信區間;這些是你可以在這裡看到的 <strong>low</strong> , <strong>mid</strong> , 和 <strong>high</strong> 屬性。此外,🤗 Datasets在比較生成摘要和參考摘要時,會根據不同類型的文本粒度計算各種 ROUGE 分數。這 <strong>rouge1</strong> 變體是一元組的重疊——這只是表達單詞重疊的一種奇特方式,這正是我們上面討論的度量標準。為了驗證這一點,讓我們輸出 <strong>mid</strong> 的數值:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->scores[<span class="hljs-string">&quot;rouge1&quot;</span>].mid<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ih9agl">太好了,準確率和召回率匹配了!那麼其他的 ROUGE 分數呢? <strong>rouge2</strong> 測量二元組之間的重疊(想想單詞對的重疊),而 <strong>rougeL</strong><strong>rougeLsum</strong> 通過在生成的和參考摘要中查找最長的公共子串來測量最長的單詞匹配序列。中的“總和” <strong>rougeLsum</strong> 指的是這個指標是在整個摘要上計算的,而 <strong>rougeL</strong> 計算為單個句子的平均值。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1czorp1">✏️ <strong>試試看!</strong> 創建您自己的生成和參考摘要示例,並查看生成的 ROUGE 分數是否與基於精確度和召回率公式的手動計算一致。 對於附加分,將文本拆分為二元組並比較“rouge2”指標的精度和召回率。</p></div> <p data-svelte-h="svelte-99pum5">我們將使用這些 ROUGE 分數來跟蹤我們模型的性能,但在此之前,讓我們做每個優秀的 NLP 從業者都應該做的事情:創建一個強大而簡單的baseline!</p> <h3 class="relative group"><a id="創建強大的baseline" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#創建強大的baseline"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>創建強大的baseline</span></h3> <p data-svelte-h="svelte-151fi9y">文本摘要的一個常見基線是簡單地取一篇文章的前三個句子,通常稱為 <em>lead-3</em> 基線。 我們可以使用句號(英文使用.)來跟蹤句子邊界,但這在”U.S.” or “U.N.”之類的首字母縮略詞上會失敗。所以我們將使用 <code>nltk</code> 庫,它包含一個更好的算法來處理這些情況。 您可以使用 <code>pip</code> 安裝軟件包,如下所示:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install nltk<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zy89wf">然後下載標點規則:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> nltk
nltk.download(<span class="hljs-string">&quot;punkt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-3nsd4c">接下來,我們從 <code>nltk</code> 導入句子標記器並創建一個簡單的函數來提取評論中的前三個句子。 文本摘要的約定是用換行符分隔每個摘要,因此我們也將其包含在內並在訓練示例上對其進行測試:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> nltk.tokenize <span class="hljs-keyword">import</span> sent_tokenize
<span class="hljs-keyword">def</span> <span class="hljs-title function_">three_sentence_summary</span>(<span class="hljs-params">text</span>):
<span class="hljs-keyword">return</span> <span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(text)[:<span class="hljs-number">3</span>])
<span class="hljs-built_in">print</span>(three_sentence_summary(books_dataset[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">1</span>][<span class="hljs-string">&quot;review_body&quot;</span>]))<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;I grew up reading Koontz, and years ago, I stopped,convinced i had &quot;outgrown&quot; him.&#x27;</span>
<span class="hljs-string">&#x27;Still,when a friend was looking for something suspenseful too read, I suggested Koontz.&#x27;</span>
<span class="hljs-string">&#x27;She found Strangers.&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ot0nfj">這似乎有效,所以讓我們現在實現一個函數,從數據集中提取這些“摘要”並計算baseline的 ROUGE 分數:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">evaluate_baseline</span>(<span class="hljs-params">dataset, metric</span>):
summaries = [three_sentence_summary(text) <span class="hljs-keyword">for</span> text <span class="hljs-keyword">in</span> dataset[<span class="hljs-string">&quot;review_body&quot;</span>]]
<span class="hljs-keyword">return</span> metric.compute(predictions=summaries, references=dataset[<span class="hljs-string">&quot;review_title&quot;</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-f11201">然後我們可以使用這個函數來計算驗證集上的 ROUGE 分數,並使用 Pandas 對它們進行一些美化:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd
score = evaluate_baseline(books_dataset[<span class="hljs-string">&quot;validation&quot;</span>], rouge_score)
rouge_names = [<span class="hljs-string">&quot;rouge1&quot;</span>, <span class="hljs-string">&quot;rouge2&quot;</span>, <span class="hljs-string">&quot;rougeL&quot;</span>, <span class="hljs-string">&quot;rougeLsum&quot;</span>]
rouge_dict = <span class="hljs-built_in">dict</span>((rn, <span class="hljs-built_in">round</span>(score[rn].mid.fmeasure * <span class="hljs-number">100</span>, <span class="hljs-number">2</span>)) <span class="hljs-keyword">for</span> rn <span class="hljs-keyword">in</span> rouge_names)
rouge_dict<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">16.74</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">8.83</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">15.6</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">15.96</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ur5vss">我們可以看到<code>rouge2</code>的分數明顯低於其他; 這可能反映了這樣一個事實,即評論標題通常很簡潔,因此lead-3 baseline過於冗長。 現在我們有了一個很好的基準,讓我們將注意力轉向微調 mT5!</p> <h2 class="relative group"><a id="使用-trainer-api微調mt5" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用-trainer-api微調mt5"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 Trainer API微調mT5</span></h2> <p data-svelte-h="svelte-1vu03ta">微調模型以進行提取摘要與我們在本章中介紹的其他任務非常相似。 我們需要做的第一件事是從<code>mt5-small</code>檢查點加載預訓練模型。 由於摘要提取是一個序列到序列的任務,我們可以使用 AutoModelForSeq2SeqLM 類加載模型,該類會自動下載並緩存權重:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-gwvjmk">💡 If you’re wondering why you don’t see any warnings about fine-tuning the model on a downstream task, that’s because for sequence-to-sequence tasks we keep all the weights of the network. Compare this to our text classification model in <a href="/course/chapter3">Chapter 3</a>, where the head of the pretrained model was replaced with a randomly initialized network.
💡 如果您想知道為什麼在下游任務中沒有看到任何關於微調模型的警告,那是因為對於序列到序列的任務,我們保留了網絡的所有權重。與我們在[第三章] (/course/chapter3)中的文本分類模型進行比較,文本分類模型預訓練模型的頭部被隨機初始化的網絡替換。</p></div> <p data-svelte-h="svelte-vzo7vx">我們需要做的下一件事是登錄 Hugging Face Hub。如果您在notebook中運行此代碼,則可以使用以下實用程序函數執行此操作:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
notebook_login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1l0n5g4">這將顯示一個小部件,您可以在其中輸入您的憑據。或者,您可以在終端中運行此命令並在那裡登錄:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->huggingface-<span class="hljs-keyword">cli</span> login<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-di08mr">我們需要生成摘要以便在訓練期間計算 ROUGE 分數。幸運的是,🤗 Transformers 提供了專用的 <code>Seq2SeqTrainingArguments</code><code>Seq2SeqTrainer</code> 類,可以自動為我們完成這項工作! 為了瞭解它是如何工作的,讓我們首先為我們的實驗定義超參數和其他參數:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Seq2SeqTrainingArguments
batch_size = <span class="hljs-number">8</span>
num_train_epochs = <span class="hljs-number">8</span>
<span class="hljs-comment"># Show the training loss with every epoch</span>
logging_steps = <span class="hljs-built_in">len</span>(tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>]) // batch_size
model_name = model_checkpoint.split(<span class="hljs-string">&quot;/&quot;</span>)[-<span class="hljs-number">1</span>]
args = Seq2SeqTrainingArguments(
output_dir=<span class="hljs-string">f&quot;<span class="hljs-subst">{model_name}</span>-finetuned-amazon-en-es&quot;</span>,
evaluation_strategy=<span class="hljs-string">&quot;epoch&quot;</span>,
learning_rate=<span class="hljs-number">5.6e-5</span>,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=<span class="hljs-number">0.01</span>,
save_total_limit=<span class="hljs-number">3</span>,
num_train_epochs=num_train_epochs,
predict_with_generate=<span class="hljs-literal">True</span>,
logging_steps=logging_steps,
push_to_hub=<span class="hljs-literal">True</span>,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ajxonu">在這裡, <strong>predict_with_generate</strong> 參數已設置為True表明我們應該在評估期間生成摘要,以便我們可以計算每個時期的 ROUGE 分數。正如在<a href="/course/chapter1">第一章</a>所討論的,解碼器通過逐個預測令牌來執行推理,這是由模型的 <strong>generate()</strong> 方法實現的。設置 <strong>predict_with_generate=True</strong> 告訴 <strong>Seq2SeqTrainer</strong> 使用該方法進行評估。我們還調整了一些默認的超參數,例如學習率、epoch數和權重衰減,並且我們設置了 <strong>save_total_limit</strong> 訓練期間最多隻保存 3 個檢查點的選項——這是因為即使是 mT5 的“small”版本也使用大約 1 GB 的硬盤空間,我們可以通過限制我們保存的副本數量來節省一點空間。</p> <p data-svelte-h="svelte-1146555"><code>push_to_hub=True</code> 參數將允許我們在訓練後將模型推送到 Hub; 您將在<code>output_dir</code>定義的位置中的用戶配置文件下找到存儲庫。 請注意,您可以使用 <code>hub_model_id</code> 參數指定要推送到的存儲庫的名稱(特別是當您想要推送到組織時,您必須使用此參數)。 例如,當我們將模型推送到 <a href="https://huggingface.co/huggingface-course" rel="nofollow"><code>huggingface-course</code> 組織</a> 時,我們添加了<code>hub_model_id=&quot;huggingface-course/mt5-finetuned-amazon-en-es&quot;</code><code>Seq2SeqTrainingArguments</code></p> <p data-svelte-h="svelte-4ndhx8">我們需要做的下一件事是為訓練器提供一個“compute_metrics()”函數,以便我們可以在訓練期間評估我們的模型。 總結起來,這比簡單地在模型的預測上調用 <code>rouge_score.compute()</code> 更復雜一些,因為我們需要在計算 ROUGE 分數之前將輸出和標籤解碼為文本。 下面的函數正是這樣做的,並且還利用 <code>nltk</code> 中的 <code>sent_tokenize()</code> 函數來用換行符分隔摘要語句:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_metrics</span>(<span class="hljs-params">eval_pred</span>):
predictions, labels = eval_pred
<span class="hljs-comment"># Decode generated summaries into text</span>
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Replace -100 in the labels as we can&#x27;t decode them</span>
labels = np.where(labels != -<span class="hljs-number">100</span>, labels, tokenizer.pad_token_id)
<span class="hljs-comment"># Decode reference summaries into text</span>
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># ROUGE expects a newline after each sentence</span>
decoded_preds = [<span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(pred.strip())) <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> decoded_preds]
decoded_labels = [<span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(label.strip())) <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> decoded_labels]
<span class="hljs-comment"># Compute ROUGE scores</span>
result = rouge_score.compute(
predictions=decoded_preds, references=decoded_labels, use_stemmer=<span class="hljs-literal">True</span>
)
<span class="hljs-comment"># Extract the median scores</span>
result = {key: value.mid.fmeasure * <span class="hljs-number">100</span> <span class="hljs-keyword">for</span> key, value <span class="hljs-keyword">in</span> result.items()}
<span class="hljs-keyword">return</span> {k: <span class="hljs-built_in">round</span>(v, <span class="hljs-number">4</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> result.items()}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-hb45we">接下來,我們需要為我們的序列到序列任務定義一個數據整理器。由於 mT5 是一個編碼器-解碼器 Transformer 模型,準備我們的批次的一個微妙之處是,在解碼過程中,我們需要將標籤向右移動一個。 這是為了確保解碼器只看到之前的真實的標籤,而不是當前或未來的標籤,這對於模型來說很容易記憶。 這類似於在 <a href="/course/chapter7/6">因果語言建模</a> 等任務中如何將掩蔽的自我注意應用於輸入。</p> <p data-svelte-h="svelte-1exrf4x">幸運的是,🤗 Transformers 提供了一個 <code>DataCollatorForSeq2Seq</code> 整理器,它將為我們動態填充輸入和標籤。 要實例化這個收集器,我們只需要提供 <code>tokenizer</code><code>model</code></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vpbjmc">讓我們看看這個整理器在輸入一小批示例時會產生什麼。 首先,我們需要刪除帶有字符串的列,因為整理器不知道如何填充這些元素:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets = tokenized_datasets.remove_columns(
books_dataset[<span class="hljs-string">&quot;train&quot;</span>].column_names
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cjqhvx">由於 collator 需要一個 <code>dict</code> 的列表,其中每個 <code>dict</code> 代表數據集中的一個示例,我們還需要在將數據傳遞給 data collator 之前將數據整理成預期的格式:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->features = [tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>][i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">2</span>)]
data_collator(features)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;attention_mask&#x27;</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>,
<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>,
<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;input_ids&#x27;</span>: tensor([[ <span class="hljs-number">1494</span>, <span class="hljs-number">259</span>, <span class="hljs-number">8622</span>, <span class="hljs-number">390</span>, <span class="hljs-number">259</span>, <span class="hljs-number">262</span>, <span class="hljs-number">2316</span>, <span class="hljs-number">3435</span>, <span class="hljs-number">955</span>,
<span class="hljs-number">772</span>, <span class="hljs-number">281</span>, <span class="hljs-number">772</span>, <span class="hljs-number">1617</span>, <span class="hljs-number">263</span>, <span class="hljs-number">305</span>, <span class="hljs-number">14701</span>, <span class="hljs-number">260</span>, <span class="hljs-number">1385</span>,
<span class="hljs-number">3031</span>, <span class="hljs-number">259</span>, <span class="hljs-number">24146</span>, <span class="hljs-number">332</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">259</span>, <span class="hljs-number">43906</span>, <span class="hljs-number">305</span>, <span class="hljs-number">336</span>,
<span class="hljs-number">260</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[ <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>, <span class="hljs-number">260</span>, <span class="hljs-number">112240</span>, <span class="hljs-number">15192</span>, <span class="hljs-number">305</span>,
<span class="hljs-number">53198</span>, <span class="hljs-number">276</span>, <span class="hljs-number">259</span>, <span class="hljs-number">74060</span>, <span class="hljs-number">263</span>, <span class="hljs-number">260</span>, <span class="hljs-number">459</span>, <span class="hljs-number">25640</span>, <span class="hljs-number">776</span>,
<span class="hljs-number">2119</span>, <span class="hljs-number">336</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2220</span>, <span class="hljs-number">259</span>, <span class="hljs-number">18896</span>, <span class="hljs-number">288</span>, <span class="hljs-number">4906</span>, <span class="hljs-number">288</span>,
<span class="hljs-number">1037</span>, <span class="hljs-number">3931</span>, <span class="hljs-number">260</span>, <span class="hljs-number">7083</span>, <span class="hljs-number">101476</span>, <span class="hljs-number">1143</span>, <span class="hljs-number">260</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;labels&#x27;</span>: tensor([[ <span class="hljs-number">7483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2364</span>, <span class="hljs-number">15695</span>, <span class="hljs-number">1</span>, -<span class="hljs-number">100</span>],
[ <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;decoder_input_ids&#x27;</span>: tensor([[ <span class="hljs-number">0</span>, <span class="hljs-number">7483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2364</span>, <span class="hljs-number">15695</span>, <span class="hljs-number">1</span>],
[ <span class="hljs-number">0</span>, <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>]])}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-p2m34e">這裡要注意的主要是第一個例子比第二個例子要長,所以第二個例子的 <code>input_ids</code><code>attention_mask</code> 已經在右側填充了一個 <code>[PAD]</code> 標記(其 ID 是 <code>0</code>)。 類似地,我們可以看到 <code>labels</code> 已用 <code>-100</code> 填充,以確保填充標記被損失函數忽略。 最後,我們可以看到一個新的 <code>decoder_input_ids</code>,它通過在第一個條目中插入 <code>[PAD]</code> 標記將標籤向右移動。</p> <p data-svelte-h="svelte-1evx2ii">我們終於擁有了訓練所需的所有的前期準備!我們現在只需要使用標準參數實例化訓練器:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Seq2SeqTrainer
trainer = Seq2SeqTrainer(
model,
args,
train_dataset=tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>],
eval_dataset=tokenized_datasets[<span class="hljs-string">&quot;validation&quot;</span>],
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1xae0e3">並啟動我們的訓練:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19bot0z">在訓練期間,您應該會看到訓練損失減少並且 ROUGE 分數隨著每個 epoch 增加。訓練完成後,您可以通過運行<strong>Trainer.evaluate()</strong> 查看最終的 ROUGE 分數 :</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.evaluate()<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;eval_loss&#x27;</span>: <span class="hljs-number">3.028524398803711</span>,
<span class="hljs-string">&#x27;eval_rouge1&#x27;</span>: <span class="hljs-number">16.9728</span>,
<span class="hljs-string">&#x27;eval_rouge2&#x27;</span>: <span class="hljs-number">8.2969</span>,
<span class="hljs-string">&#x27;eval_rougeL&#x27;</span>: <span class="hljs-number">16.8366</span>,
<span class="hljs-string">&#x27;eval_rougeLsum&#x27;</span>: <span class="hljs-number">16.851</span>,
<span class="hljs-string">&#x27;eval_gen_len&#x27;</span>: <span class="hljs-number">10.1597</span>,
<span class="hljs-string">&#x27;eval_runtime&#x27;</span>: <span class="hljs-number">6.1054</span>,
<span class="hljs-string">&#x27;eval_samples_per_second&#x27;</span>: <span class="hljs-number">38.982</span>,
<span class="hljs-string">&#x27;eval_steps_per_second&#x27;</span>: <span class="hljs-number">4.914</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-186a160">從分數中我們可以看到,我們的模型輕鬆超過了我們的lead-3 baseline——很好!最後要做的是將模型權重推送到 Hub,如下所示:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.push_to_hub(<span class="hljs-attribute">commit_message</span>=<span class="hljs-string">&quot;Training complete&quot;</span>, <span class="hljs-attribute">tags</span>=<span class="hljs-string">&quot;summarization&quot;</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;https://huggingface.co/huggingface-course/mt5-finetuned-amazon-en-es/commit/aa0536b829b28e73e1e4b94b8a5aacec420d40e0&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-v9ezef">這會將檢查點和配置文件保存到 <strong>output_dir</strong> , 在將所有文件上傳到模型中心之前。通過指定 <strong>tags</strong> 參數,我們還確保模型中心上的小部件將是一個用於彙總管道的小部件,而不是與 mT5 架構關聯的默認文本生成小部件(有關模型標籤的更多信息,請參閱<a href="https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined" rel="nofollow">🤗 Hub 文檔</a>)。輸出來自 <strong>trainer.push_to_hub()</strong> 是 Git 提交哈希的 URL,因此您可以輕鬆查看對模型存儲庫所做的更改!</p> <p data-svelte-h="svelte-1n5yakw">在結束本節之前,讓我們看一下如何使用 🤗 Accelerate 提供的底層API對 mT5 進行微調。</p> <h2 class="relative group"><a id="使用--accelerate-微調-mt5" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用--accelerate-微調-mt5"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用 🤗 Accelerate 微調 mT5</span></h2> <p data-svelte-h="svelte-34tute">使用 🤗 Accelerate 微調我們的模型與我們在 <a href="/course/chapter3">Chapter 3</a> 中遇到的文本分類示例非常相似。 主要區別在於需要在訓練期間顯式生成摘要並定義我們如何計算 ROUGE 分數(回想一下,<code>Seq2SeqTrainer</code> 為我們生成了摘要)。 讓我們看看我們如何在 🤗 Accelerate 中實現這兩個要求!</p> <h3 class="relative group"><a id="為訓練做好一切準備" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#為訓練做好一切準備"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>為訓練做好一切準備</span></h3> <p data-svelte-h="svelte-vaymut">The first thing we need to do is create a <code>DataLoader</code> for each of our splits. Since the PyTorch dataloaders expect batches of tensors, we need to set the format to <code>&quot;torch&quot;</code> in our datasets:
我們需要做的第一件事是為每個數據集的每一個拆分創建一個<code>DataLoader</code>。 由於 PyTorch 數據加載器需要成批的張量,我們需要在數據集中將格式設置為<code>torch</code></p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets.set_format(<span class="hljs-string">&quot;torch&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-j2upv7">現在我們已經有了僅由張量組成的數據集,接下來要做的是再次實例化<code>DataCollatorForSeq2Seq</code>。 為此,我們需要提供模型微調前的版本,所以讓我們從緩存中再次加載它:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13d4wbh">然後我們可以實例化數據整理器並使用它來定義我們的數據加載器:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader
batch_size = <span class="hljs-number">8</span>
train_dataloader = DataLoader(
tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>],
shuffle=<span class="hljs-literal">True</span>,
collate_fn=data_collator,
batch_size=batch_size,
)
eval_dataloader = DataLoader(
tokenized_datasets[<span class="hljs-string">&quot;validation&quot;</span>], collate_fn=data_collator, batch_size=batch_size
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17ustuc">接下來要做的是定義我們想要使用的優化器。與我們的其他示例一樣,我們將使用 <strong>AdamW</strong> ,這適用於大多數問題:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW
optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">2e-5</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19isevy">最後,我們將模型、優化器和數據加載器提供給 <strong>accelerator.prepare()</strong> 方法:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
accelerator = Accelerator()
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
model, optimizer, train_dataloader, eval_dataloader
)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1ovzgdz">🚨如果您在 TPU 上進行訓練,則需要將上述所有代碼移動到專門的訓練函數中。有關詳細信息,請參閱<a href="/course/chapter3">第三章</a></p></div> <p data-svelte-h="svelte-vtsua8">現在我們已經準備好了我們索要用的對象,還有三件事要做:</p> <ul data-svelte-h="svelte-1emhrdw"><li>定義學習率調度計劃。</li> <li>實現一個功能來對摘要進行後續處理以進行評估。</li> <li>在 Hub 上創建一個存儲庫,我們可以將模型推送到該存儲庫。</li></ul> <p data-svelte-h="svelte-6h01mk">對於學習率調度,我們將使用前幾節中的標準線性衰減:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> get_scheduler
num_train_epochs = <span class="hljs-number">10</span>
num_update_steps_per_epoch = <span class="hljs-built_in">len</span>(train_dataloader)
num_training_steps = num_train_epochs * num_update_steps_per_epoch
lr_scheduler = get_scheduler(
<span class="hljs-string">&quot;linear&quot;</span>,
optimizer=optimizer,
num_warmup_steps=<span class="hljs-number">0</span>,
num_training_steps=num_training_steps,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ybzp24">對於後續處理,我們需要一個函數,將生成的摘要拆分為由換行符分隔的句子。 這是 ROUGE 指標所期望的格式,我們可以使用以下代碼片段來實現:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">postprocess_text</span>(<span class="hljs-params">preds, labels</span>):
preds = [pred.strip() <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
labels = [label.strip() <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> labels]
<span class="hljs-comment"># ROUGE expects a newline after each sentence</span>
preds = [<span class="hljs-string">&quot;\n&quot;</span>.join(nltk.sent_tokenize(pred)) <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
labels = [<span class="hljs-string">&quot;\n&quot;</span>.join(nltk.sent_tokenize(label)) <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> labels]
<span class="hljs-keyword">return</span> preds, labels<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-u939lb">如果你還記得我們是如何定義 <code>Seq2SeqTrainer</code><code>compute_metrics()</code> 函數的,這對你來說應該很熟悉。</p> <p data-svelte-h="svelte-18hbt2a">最後,我們需要在 Hugging Face Hub 上創建一個模型存儲庫。 為此,我們可以使用🤗 Hub 庫的get_full_repo_name。 我們只需要為我們的存儲庫定義一個名稱,該庫有一個非常好用的函數可以將存儲庫 ID 與用戶配置文件結合起來:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_full_repo_name
model_name = <span class="hljs-string">&quot;test-bert-finetuned-squad-accelerate&quot;</span>
repo_name = get_full_repo_name(model_name)
repo_name<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;lewtun/mt5-finetuned-amazon-en-es-accelerate&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-97puqp">現在我們可以使用這個存儲庫名稱將本地版本克隆到我們的結果目錄中,該目錄將存儲訓練的模型:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> Repository
output_dir = <span class="hljs-string">&quot;results-mt5-finetuned-squad-accelerate&quot;</span>
repo = Repository(output_dir, clone_from=repo_name)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4m9afo">這將允許我們在訓練期間通過調用 <code>repo.push_to_hub()</code> 方法將模型推送到 Hub! 現在讓我們通過寫出完整的訓練循環來結束我們的分析。</p> <h3 class="relative group"><a id="訓練循環" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#訓練循環"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>訓練循環</span></h3> <p data-svelte-h="svelte-1mstidq">文本摘要的訓練循環與我們遇到的其他 🤗 Accelerate 示例非常相似,大致分為四個主要步驟:這</p> <ol data-svelte-h="svelte-bno592"><li>通過在每個epoch 迭代 <code>train_dataloader</code> 中的所有示例來訓練模型。</li> <li>在每個 epoch 結束時生成模型摘要,首先生成標記,然後將它們(和參考摘要)解碼為文本。</li> <li>使用我們之前看到的相同技術計算 ROUGE 分數。</li> <li>保存檢查點並將所有內容推送到 Hub。 在這裡,我們依賴 <code>Repository</code> 對象的巧妙的 <code>blocking=False</code> 參數,以便我們可以在每個 epoch 異步地上傳檢查點。 這使我們能夠繼續訓練,而不必等待與 GB 大小的模型慢呼呼的上傳!</li></ol> <p data-svelte-h="svelte-127wmdb">這些步驟可以在以下代碼塊中看到:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps))
<span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_train_epochs):
<span class="hljs-comment"># Training</span>
model.train()
<span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(train_dataloader):
outputs = model(**batch)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
progress_bar.update(<span class="hljs-number">1</span>)
<span class="hljs-comment"># Evaluation</span>
model.<span class="hljs-built_in">eval</span>()
<span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(eval_dataloader):
<span class="hljs-keyword">with</span> torch.no_grad():
generated_tokens = accelerator.unwrap_model(model).generate(
batch[<span class="hljs-string">&quot;input_ids&quot;</span>],
attention_mask=batch[<span class="hljs-string">&quot;attention_mask&quot;</span>],
)
generated_tokens = accelerator.pad_across_processes(
generated_tokens, dim=<span class="hljs-number">1</span>, pad_index=tokenizer.pad_token_id
)
labels = batch[<span class="hljs-string">&quot;labels&quot;</span>]
<span class="hljs-comment"># If we did not pad to max length, we need to pad the labels too</span>
labels = accelerator.pad_across_processes(
batch[<span class="hljs-string">&quot;labels&quot;</span>], dim=<span class="hljs-number">1</span>, pad_index=tokenizer.pad_token_id
)
generated_tokens = accelerator.gather(generated_tokens).cpu().numpy()
labels = accelerator.gather(labels).cpu().numpy()
<span class="hljs-comment"># Replace -100 in the labels as we can&#x27;t decode them</span>
labels = np.where(labels != -<span class="hljs-number">100</span>, labels, tokenizer.pad_token_id)
<span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(generated_tokens, <span class="hljs-built_in">tuple</span>):
generated_tokens = generated_tokens[<span class="hljs-number">0</span>]
decoded_preds = tokenizer.batch_decode(
generated_tokens, skip_special_tokens=<span class="hljs-literal">True</span>
)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=<span class="hljs-literal">True</span>)
decoded_preds, decoded_labels = postprocess_text(
decoded_preds, decoded_labels
)
rouge_score.add_batch(predictions=decoded_preds, references=decoded_labels)
<span class="hljs-comment"># Compute metrics</span>
result = rouge_score.compute()
<span class="hljs-comment"># Extract the median ROUGE scores</span>
result = {key: value.mid.fmeasure * <span class="hljs-number">100</span> <span class="hljs-keyword">for</span> key, value <span class="hljs-keyword">in</span> result.items()}
result = {k: <span class="hljs-built_in">round</span>(v, <span class="hljs-number">4</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> result.items()}
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Epoch <span class="hljs-subst">{epoch}</span>:&quot;</span>, result)
<span class="hljs-comment"># Save and upload</span>
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save)
<span class="hljs-keyword">if</span> accelerator.is_main_process:
tokenizer.save_pretrained(output_dir)
repo.push_to_hub(
commit_message=<span class="hljs-string">f&quot;Training in progress epoch <span class="hljs-subst">{epoch}</span>&quot;</span>, blocking=<span class="hljs-literal">False</span>
)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Epoch <span class="hljs-number">0</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">5.6351</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">1.1625</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">5.4866</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">5.5005</span>}
Epoch <span class="hljs-number">1</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">9.8646</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">3.4106</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">9.9439</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">9.9306</span>}
Epoch <span class="hljs-number">2</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">11.0872</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">3.3273</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">11.0508</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">10.9468</span>}
Epoch <span class="hljs-number">3</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">11.8587</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">4.8167</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">11.7986</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">11.7518</span>}
Epoch <span class="hljs-number">4</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">12.9842</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">5.5887</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">12.7546</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">12.7029</span>}
Epoch <span class="hljs-number">5</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.4628</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.4598</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.312</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.2913</span>}
Epoch <span class="hljs-number">6</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">12.9131</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">5.8914</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">12.6896</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">12.5701</span>}
Epoch <span class="hljs-number">7</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.3079</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.2994</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.1536</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.1194</span>}
Epoch <span class="hljs-number">8</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.96</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.5998</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.9123</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.7744</span>}
Epoch <span class="hljs-number">9</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">14.1192</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">7.0059</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">14.1172</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.9509</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1okcb59">就是這樣! 運行此程序後,您將獲得與我們使用“Trainer”獲得的模型和結果非常相似的模型和結果。</p> <h2 class="relative group"><a id="使用您微調的模型" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#使用您微調的模型"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>使用您微調的模型</span></h2> <p data-svelte-h="svelte-1gond6a">將模型推送到 Hub 後,您可以通過推理小部件或“管道”對象來使用它,如下所示:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
hub_model_id = <span class="hljs-string">&quot;huggingface-course/mt5-small-finetuned-amazon-en-es&quot;</span>
summarizer = pipeline(<span class="hljs-string">&quot;summarization&quot;</span>, model=hub_model_id)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1po5f75">我們可以將測試集中的一些示例(模型還沒有看到)提供給我們的管道,以瞭解生成摘要的質量。 首先讓我們實現一個簡單的函數來一起顯示評論、標題和生成的摘要:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">print_summary</span>(<span class="hljs-params">idx</span>):
review = books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_body&quot;</span>]
title = books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_title&quot;</span>]
summary = summarizer(books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_body&quot;</span>])[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;summary_text&quot;</span>]
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&#x27;&gt;&gt;&gt; Review: <span class="hljs-subst">{review}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt;&gt; Title: <span class="hljs-subst">{title}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt;&gt; Summary: <span class="hljs-subst">{summary}</span>&#x27;&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1tt494o">讓我們看一下我們得到的一個英文例子:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->print_summary(<span class="hljs-number">100</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt;&gt; Review: Nothing special at all about this product... the book is too small and stiff and hard to write in. The huge sticker on the back doesn’t come off and looks super tacky. I would not purchase this again. I could have just bought a journal from the dollar store and it would be basically the same thing. It’s also really expensive for what it is.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Title: Not impressed at all... buy something else&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Summary: Nothing special at all about this product&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-h36ly9">這還不錯! 我們可以看到,我們的模型實際上已經能夠通過增加部分新詞來執行抽象摘要。 也許我們模型最酷的方面是它是雙語的,所以我們還可以生成西班牙語評論的摘要:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->print_summary(<span class="hljs-number">0</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt;&gt; Review: Es una trilogia que se hace muy facil de leer. Me ha gustado, no me esperaba el final para nada&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Title: Buena literatura para adolescentes&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Summary: Muy facil de leer&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1axhtkv">摘要翻譯成了英文的“非常容易閱讀”,在這種情況下,我們可以看到它是直接從評論中提取的。 這顯示了 mT5 模型的多功能性,並讓您體驗了處理多語言語料庫的感覺!</p> <p data-svelte-h="svelte-ybbmy9">接下來,我們將把注意力轉向稍微複雜的任務:從頭開始訓練語言模型。</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/zh-TW/chapter7/5.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_14z83n9 = {
assets: "/docs/course/pr_1069/zh-TW",
base: "/docs/course/pr_1069/zh-TW",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/course/pr_1069/zh-TW/_app/immutable/entry/start.01945326.js"),
import("/docs/course/pr_1069/zh-TW/_app/immutable/entry/app.e8597ff2.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 58],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
199 kB
·
Xet hash:
7c724f2d47096e6259540a0df875f39a939222854645e850540ed7a0c0a1619d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.