Buckets:

rtrm's picture
download
raw
210 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;要約&quot;,&quot;local&quot;:&quot;要約&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;多言語コーパスの準備&quot;,&quot;local&quot;:&quot;多言語コーパスの準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文章要約用モデル&quot;,&quot;local&quot;:&quot;文章要約用モデル&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;データの前処理&quot;,&quot;local&quot;:&quot;データの前処理&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文章要約のための指標&quot;,&quot;local&quot;:&quot;文章要約のための指標&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;強力なベースラインの作成&quot;,&quot;local&quot;:&quot;強力なベースラインの作成&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Trainer API を使って mT5 を微調整する&quot;,&quot;local&quot;:&quot;trainer-api-を使って-mt5-を微調整する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;KerasでmT5を微調整する&quot;,&quot;local&quot;:&quot;kerasでmt5を微調整する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;mT5モデルを 🤗 Accelerate を使って微調整する&quot;,&quot;local&quot;:&quot;mt5モデルを--accelerate-を使って微調整する&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;トレーニングのための準備&quot;,&quot;local&quot;:&quot;トレーニングのための準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;学習ループ&quot;,&quot;local&quot;:&quot;学習ループ&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;あなたの微調整したモデルを使用する&quot;,&quot;local&quot;:&quot;あなたの微調整したモデルを使用する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/course/pr_1069/ja/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/entry/start.e837503d.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/scheduler.37c15a92.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/singletons.29b81cfd.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/index.18351ede.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/paths.a1ea15c2.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/entry/app.2ea0d654.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/index.7cb9c9b8.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/nodes/0.dbe7c785.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/nodes/28.28af55db.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/Tip.d10b3fc9.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/Youtube.8666c400.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/CodeBlock.abae2786.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/CourseFloatingBanner.df82c153.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/FrameworkSwitchCourse.97630871.js">
<link rel="modulepreload" href="/docs/course/pr_1069/ja/_app/immutable/chunks/getInferenceSnippets.f9350a3f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;要約&quot;,&quot;local&quot;:&quot;要約&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;多言語コーパスの準備&quot;,&quot;local&quot;:&quot;多言語コーパスの準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文章要約用モデル&quot;,&quot;local&quot;:&quot;文章要約用モデル&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;データの前処理&quot;,&quot;local&quot;:&quot;データの前処理&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;文章要約のための指標&quot;,&quot;local&quot;:&quot;文章要約のための指標&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;強力なベースラインの作成&quot;,&quot;local&quot;:&quot;強力なベースラインの作成&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;Trainer API を使って mT5 を微調整する&quot;,&quot;local&quot;:&quot;trainer-api-を使って-mt5-を微調整する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;KerasでmT5を微調整する&quot;,&quot;local&quot;:&quot;kerasでmt5を微調整する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;mT5モデルを 🤗 Accelerate を使って微調整する&quot;,&quot;local&quot;:&quot;mt5モデルを--accelerate-を使って微調整する&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;トレーニングのための準備&quot;,&quot;local&quot;:&quot;トレーニングのための準備&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;学習ループ&quot;,&quot;local&quot;:&quot;学習ループ&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;あなたの微調整したモデルを使用する&quot;,&quot;local&quot;:&quot;あなたの微調整したモデルを使用する&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="bg-white leading-none border border-gray-100 rounded-lg flex p-0.5 w-56 text-sm mb-4"><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-l bg-red-50 dark:bg-transparent text-red-600" href="?fw=pt"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><defs><clipPath id="a"><rect x="3.05" y="0.5" width="25.73" height="31" fill="none"></rect></clipPath></defs><g clip-path="url(#a)"><path d="M24.94,9.51a12.81,12.81,0,0,1,0,18.16,12.68,12.68,0,0,1-18,0,12.81,12.81,0,0,1,0-18.16l9-9V5l-.84.83-6,6a9.58,9.58,0,1,0,13.55,0ZM20.44,9a1.68,1.68,0,1,1,1.67-1.67A1.68,1.68,0,0,1,20.44,9Z" fill="#ee4c2c"></path></g></svg> Pytorch </a><a class="flex justify-center flex-1 py-1.5 px-2.5 focus:outline-none !no-underline rounded-r text-gray-500 filter grayscale" href="?fw=tf"><svg class="mr-1.5" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" focusable="false" role="img" width="0.94em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 274"><path d="M145.726 42.065v42.07l72.861 42.07v-42.07l-72.86-42.07zM0 84.135v42.07l36.43 21.03V105.17L0 84.135zm109.291 21.035l-36.43 21.034v126.2l36.43 21.035v-84.135l36.435 21.035v-42.07l-36.435-21.034V105.17z" fill="#E55B2D"></path><path d="M145.726 42.065L36.43 105.17v42.065l72.861-42.065v42.065l36.435-21.03v-84.14zM255.022 63.1l-36.435 21.035v42.07l36.435-21.035V63.1zm-72.865 84.135l-36.43 21.035v42.07l36.43-21.036v-42.07zm-36.43 63.104l-36.436-21.035v84.135l36.435-21.035V210.34z" fill="#ED8E24"></path><path d="M145.726 0L0 84.135l36.43 21.035l109.296-63.105l72.861 42.07L255.022 63.1L145.726 0zm0 126.204l-36.435 21.03l36.435 21.036l36.43-21.035l-36.43-21.03z" fill="#F8BF3C"></path></svg> TensorFlow </a></div> <h1 class="relative group"><a id="要約" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#要約"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>要約</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"><a href="https://discuss.huggingface.co/t/chapter-7-questions" target="_blank"><img alt="Ask a Question" class="!m-0" src="https://img.shields.io/badge/Ask%20a%20question-ffcb4c.svg?logo=data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHZpZXdCb3g9IjAgLTEgMTA0IDEwNiI+PGRlZnM+PHN0eWxlPi5jbHMtMXtmaWxsOiMyMzFmMjA7fS5jbHMtMntmaWxsOiNmZmY5YWU7fS5jbHMtM3tmaWxsOiMwMGFlZWY7fS5jbHMtNHtmaWxsOiMwMGE5NGY7fS5jbHMtNXtmaWxsOiNmMTVkMjI7fS5jbHMtNntmaWxsOiNlMzFiMjM7fTwvc3R5bGU+PC9kZWZzPjx0aXRsZT5EaXNjb3Vyc2VfbG9nbzwvdGl0bGU+PGcgaWQ9IkxheWVyXzIiPjxnIGlkPSJMYXllcl8zIj48cGF0aCBjbGFzcz0iY2xzLTEiIGQ9Ik01MS44NywwQzIzLjcxLDAsMCwyMi44MywwLDUxYzAsLjkxLDAsNTIuODEsMCw1Mi44MWw1MS44Ni0uMDVjMjguMTYsMCw1MS0yMy43MSw1MS01MS44N1M4MCwwLDUxLjg3LDBaIi8+PHBhdGggY2xhc3M9ImNscy0yIiBkPSJNNTIuMzcsMTkuNzRBMzEuNjIsMzEuNjIsMCwwLDAsMjQuNTgsNjYuNDFsLTUuNzIsMTguNEwzOS40LDgwLjE3YTMxLjYxLDMxLjYxLDAsMSwwLDEzLTYwLjQzWiIvPjxwYXRoIGNsYXNzPSJjbHMtMyIgZD0iTTc3LjQ1LDMyLjEyYTMxLjYsMzEuNiwwLDAsMS0zOC4wNSw0OEwxOC44Niw4NC44MmwyMC45MS0yLjQ3QTMxLjYsMzEuNiwwLDAsMCw3Ny40NSwzMi4xMloiLz48cGF0aCBjbGFzcz0iY2xzLTQiIGQ9Ik03MS42MywyNi4yOUEzMS42LDMxLjYsMCwwLDEsMzguOCw3OEwxOC44Niw4NC44MiwzOS40LDgwLjE3QTMxLjYsMzEuNiwwLDAsMCw3MS42MywyNi4yOVoiLz48cGF0aCBjbGFzcz0iY2xzLTUiIGQ9Ik0yNi40Nyw2Ny4xMWEzMS42MSwzMS42MSwwLDAsMSw1MS0zNUEzMS42MSwzMS42MSwwLDAsMCwyNC41OCw2Ni40MWwtNS43MiwxOC40WiIvPjxwYXRoIGNsYXNzPSJjbHMtNiIgZD0iTTI0LjU4LDY2LjQxQTMxLjYxLDMxLjYxLDAsMCwxLDcxLjYzLDI2LjI5YTMxLjYxLDMxLjYxLDAsMCwwLTQ5LDM5LjYzbC0zLjc2LDE4LjlaIi8+PC9nPjwvZz48L3N2Zz4="></a> <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/master/course/ja/chapter7/section5_pt.ipynb" target="_blank"><img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/master/course/ja/chapter7/section5_pt.ipynb" target="_blank"><img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"></a></div> <p data-svelte-h="svelte-1i2hjpd">このセクションでは、Transformerモデルを使用して、長いドキュメントを要約する方法を見ていきます。これは、<em>文章要約</em> として知られるタスクです。 これは、長い文章を理解したり、ドキュメントの主要なトピックを補足する一貫性のあるテキストを生成したりするなど、さまざまな能力を必要とするため、最も困難なNLPタスクの1つです。 ただし、テキストの要約は、うまく行けば、領域の専門家が長いドキュメントを詳細に読む負担を軽減することで、さまざまなビジネスプロセスをスピードアップできる強力なツールになります。</p> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/yHnr5Dk2zCI" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-69wbpw"><a href="https://huggingface.co/models?pipeline_tag=summarization&sort=downloads" rel="nofollow">ハギングフェイス ハブ</a>には、要約用に微調整されたさまざまなモデルがすでに存在しますが、これらのほとんどは英語のドキュメントにのみ適しています。 したがって、このセクションにひねりを加えるために、英語とスペイン語のバイリンガルモデルをトレーニングします。 このセクションの終わりまでに、ここに示すようなカスタマーレビューを要約できる<a href="https://huggingface.co/huggingface-course/mt5-small-finetuned-amazon-en-es" rel="nofollow">モデル</a>ができあがります。</p> <iframe src="https://course-demos-mt5-small-finetuned-amazon-en-es.hf.space" frameborder="0" height="400" title="Gradio app" class="block dark:hidden container p-0 flex-grow space-iframe" allow="accelerometer; ambient-light-sensor; autoplay; battery; camera; document-domain; encrypted-media; fullscreen; geolocation; gyroscope; layout-animations; legacy-image-formats; magnetometer; microphone; midi; oversized-images; payment; picture-in-picture; publickey-credentials-get; sync-xhr; usb; vr ; wake-lock; xr-spatial-tracking" sandbox="allow-forms allow-modals allow-popups allow-popups-to-escape-sandbox allow-same-origin allow-scripts allow-downloads"></iframe> <p data-svelte-h="svelte-1dbid4x">これから説明するように、これらの要約は、顧客が製品レビュー投稿時につけたタイトル文を使って学習されているため、簡潔です。 このタスクに適した多言語コーパスをまとめることから始めましょう。</p> <h2 class="relative group"><a id="多言語コーパスの準備" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#多言語コーパスの準備"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>多言語コーパスの準備</span></h2> <p data-svelte-h="svelte-1lfcs84"><a href="https://huggingface.co/datasets/amazon_reviews_multi" rel="nofollow">Multilingual Amazon Reviews Corpus</a>を使用して、多言語要約器を作成します。このコーパスは、6つの言語でのAmazon製品レビューで構成されており、通常、多言語分類子のベンチマークに使用されます。 ただし、各レビューには短いタイトルが付いているため、モデルが学習対象とする要約文としてタイトルを使用できます。 開始するには、ハギングフェイス ハブから英語とスペイン語のサブセットをダウンロードしましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
spanish_dataset = load_dataset(<span class="hljs-string">&quot;amazon_reviews_multi&quot;</span>, <span class="hljs-string">&quot;es&quot;</span>)
english_dataset = load_dataset(<span class="hljs-string">&quot;amazon_reviews_multi&quot;</span>, <span class="hljs-string">&quot;en&quot;</span>)
english_dataset<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">200000</span>
})
validation: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">5000</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;review_id&#x27;</span>, <span class="hljs-string">&#x27;product_id&#x27;</span>, <span class="hljs-string">&#x27;reviewer_id&#x27;</span>, <span class="hljs-string">&#x27;stars&#x27;</span>, <span class="hljs-string">&#x27;review_body&#x27;</span>, <span class="hljs-string">&#x27;review_title&#x27;</span>, <span class="hljs-string">&#x27;language&#x27;</span>, <span class="hljs-string">&#x27;product_category&#x27;</span>],
num_rows: <span class="hljs-number">5000</span>
})
})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rkp0la">ご覧の通り、各言語の <code>train</code> 分割には 200,000 件のレビューがあり、 <code>validation</code><code>test</code> 分割にはそれぞれ 5,000 件のレビューがあります。私達が内容を知りたいレビュー情報は <code>review_body</code><code>review_title</code> カラムに含まれています。<a href="/course/ja/chapter5">第5章</a> で学んだ手法で、トレーニングセットからランダムにサンプルを取得する簡単な関数を作成し、いくつかの例を見てみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">show_samples</span>(<span class="hljs-params">dataset, num_samples=<span class="hljs-number">3</span>, seed=<span class="hljs-number">42</span></span>):
sample = dataset[<span class="hljs-string">&quot;train&quot;</span>].shuffle(seed=seed).select(<span class="hljs-built_in">range</span>(num_samples))
<span class="hljs-keyword">for</span> example <span class="hljs-keyword">in</span> sample:
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt; Title: <span class="hljs-subst">{example[<span class="hljs-string">&#x27;review_title&#x27;</span>]}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&#x27;&gt;&gt; Review: <span class="hljs-subst">{example[<span class="hljs-string">&#x27;review_body&#x27;</span>]}</span>&#x27;&quot;</span>)
show_samples(english_dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: Worked in front position, not rear&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: 3 stars because these are not rear brakes as stated in the item description. At least the mount adapter only worked on the front fork of the bike that I got it for.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: meh&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Does it’s job and it’s gorgeous but mine is falling apart, I had to basically put it together again with hot glue&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Can\&#x27;t beat these for the money&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Bought this for handling miscellaneous aircraft parts and hanger &quot;stuff&quot; that I needed to organize; it really fit the bill. The unit arrived quickly, was well packaged and arrived intact (always a good sign). There are five wall mounts-- three on the top and two on the bottom. I wanted to mount it on the wall, so all I had to do was to remove the top two layers of plastic drawers, as well as the bottom corner drawers, place it when I wanted and mark it; I then used some of the new plastic screw in wall anchors (the 50 pound variety) and it easily mounted to the wall. Some have remarked that they wanted dividers for the drawers, and that they made those. Good idea. My application was that I needed something that I can see the contents at about eye level, so I wanted the fuller-sized drawers. I also like that these are the new plastic that doesn\&#x27;t get brittle and split like my older plastic drawers did. I like the all-plastic construction. It\&#x27;s heavy duty enough to hold metal parts, but being made of plastic it\&#x27;s not as heavy as a metal frame, so you can easily mount it to the wall and still load it up with heavy stuff, or light stuff. No problem there. For the money, you can\&#x27;t beat it. Best one of these I\&#x27;ve bought to date-- and I\&#x27;ve been using some version of these for over forty years.&#x27;</span><!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-in0ogf">✏️ <strong>あなたの番です!</strong> <code>Dataset.shuffle()</code> コマンドのランダムシードを変更して、コーパスの他のレビューも調べてみてください。もしあなたがスペイン語を話せるなら、<code>spanish_dataset</code> にあるいくつかのレビューを見て、タイトルも妥当な要約に見えるかどうか確かめてみてください。</p></div> <p data-svelte-h="svelte-ocozdg">このサンプルは、肯定的なレビューから否定的なレビューまで(そしてその中間にある全てのレビュー!)、一般的にオンラインで見られるレビューの多様性を示しています。 「meh」というタイトルはあまり有益な情報を示すタイトルではありませんが、他のタイトルはレビュー自体の適切な要約のように見えます。40万件のレビューすべてについて要約モデルをトレーニングすることは、単一のGPUではあまりにも時間がかかりすぎるため、その代わりに、単一製品のドメインについて要約を生成することに焦点を当てます。どのようなドメインから選択できるかを知るために、<code>english_dataset</code><code>pandas.DataFrame</code> に変換して、製品カテゴリごとのレビュー数を計算してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->english_dataset.set_format(<span class="hljs-string">&quot;pandas&quot;</span>)
english_df = english_dataset[<span class="hljs-string">&quot;train&quot;</span>][:]
<span class="hljs-comment"># Show counts for top 20 products</span>
english_df[<span class="hljs-string">&quot;product_category&quot;</span>].value_counts()[:<span class="hljs-number">20</span>]<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->home <span class="hljs-number">17679</span>
apparel <span class="hljs-number">15951</span>
wireless <span class="hljs-number">15717</span>
other <span class="hljs-number">13418</span>
beauty <span class="hljs-number">12091</span>
drugstore <span class="hljs-number">11730</span>
kitchen <span class="hljs-number">10382</span>
toy <span class="hljs-number">8745</span>
sports <span class="hljs-number">8277</span>
automotive <span class="hljs-number">7506</span>
lawn_and_garden <span class="hljs-number">7327</span>
home_improvement <span class="hljs-number">7136</span>
pet_products <span class="hljs-number">7082</span>
digital_ebook_purchase <span class="hljs-number">6749</span>
pc <span class="hljs-number">6401</span>
electronics <span class="hljs-number">6186</span>
office_product <span class="hljs-number">5521</span>
shoes <span class="hljs-number">5197</span>
grocery <span class="hljs-number">4730</span>
book <span class="hljs-number">3756</span>
Name: product_category, dtype: int64<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-xqrer3">英語のデータセットで最も人気のある商品は、家庭用品、衣類、ワイヤレス電子機器に関するものです。しかし、Amazon本来のテーマに沿って、書評の要約に焦点を当てましょう。結局のところ、書籍はこの会社が設立された際の商品なのです! 2つの製品カテゴリ(<code>book</code><code>digital_ebook_purchase</code>) が当てはまるので、これらの製品について両言語でデータセットをフィルタリングしてみましょう。<a href="/course/ja/chapter5">第5章</a> で見たように、 <code>Dataset.filter()</code> 関数を使うと非常に効率的にデータセットをスライスできるので、これを行うための簡単な関数を定義してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">filter_books</span>(<span class="hljs-params">example</span>):
<span class="hljs-keyword">return</span> (
example[<span class="hljs-string">&quot;product_category&quot;</span>] == <span class="hljs-string">&quot;book&quot;</span>
<span class="hljs-keyword">or</span> example[<span class="hljs-string">&quot;product_category&quot;</span>] == <span class="hljs-string">&quot;digital_ebook_purchase&quot;</span>
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1qptrsy">この関数を <code>english_dataset</code><code>spanish_dataset</code> に適用すると、書籍のカテゴリを含む行だけが結果に含まれるようになります。フィルタを適用する前に、<code>english_dataset</code> のフォーマットを <code>&quot;pandas&quot;</code> から <code>&quot;arrow&quot;</code> に戻してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->english_dataset.reset_format()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4pxau6">次に、フィルター機能を適用し、サニティーチェックとして、レビューのサンプルが本当に本に関するものかどうかを調べてみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->spanish_books = spanish_dataset.<span class="hljs-built_in">filter</span>(filter_books)
english_books = english_dataset.<span class="hljs-built_in">filter</span>(filter_books)
show_samples(english_books)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: I\&#x27;m dissapointed.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I guess I had higher expectations for this book from the reviews. I really thought I\&#x27;d at least like it. The plot idea was great. I loved Ash but, it just didnt go anywhere. Most of the book was about their radio show and talking to callers. I wanted the author to dig deeper so we could really get to know the characters. All we know about Grace is that she is attractive looking, Latino and is kind of a brat. I\&#x27;m dissapointed.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Good art, good price, poor design&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I had gotten the DC Vintage calendar the past two years, but it was on backorder forever this year and I saw they had shrunk the dimensions for no good reason. This one has good art choices but the design has the fold going through the picture, so it\&#x27;s less aesthetically pleasing, especially if you want to keep a picture to hang. For the price, a good calendar&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: Helpful&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Nearly all the tips useful and. I consider myself an intermediate to advanced user of OneNote. I would highly recommend.&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1axa9xf">レビューが厳密に本についてではなく、カレンダーやOneNoteのような電子アプリケーションのようなものを参照している可能性があることがわかります。それでも、このドメインは要約モデルを学習させるのに適していると思われます。このタスクに適した様々なモデルを見る前に、最後のデータ準備として、英語とスペイン語のレビューを1つの <code>DatasetDict</code> オブジェクトとして結合する必要があります。🤗 Datasetsには便利な <code>concatenate_datasets()</code> 関数があり、(その名の通り)2つの <code>Dataset</code> オブジェクトを重ね合わせることができます。つまり、バイリンガル・データセットを作成するために、各分割をループし、その分割データセットを連結し、モデルが単一言語に過剰適合しないように結果をシャッフルします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> concatenate_datasets, DatasetDict
books_dataset = DatasetDict()
<span class="hljs-keyword">for</span> split <span class="hljs-keyword">in</span> english_books.keys():
books_dataset[split] = concatenate_datasets(
[english_books[split], spanish_books[split]]
)
books_dataset[split] = books_dataset[split].shuffle(seed=<span class="hljs-number">42</span>)
<span class="hljs-comment"># Peek at a few examples</span>
show_samples(books_dataset)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt; Title: Easy to follow!!!!&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: I loved The dash diet weight loss Solution. Never hungry. I would recommend this diet. Also the menus are well rounded. Try it. Has lots of the information need thanks.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: PARCIALMENTE DAÑADO&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: Me llegó el día que tocaba, junto a otros libros que pedí, pero la caja llegó en mal estado lo cual dañó las esquinas de los libros porque venían sin protección (forro).&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Title: no lo he podido descargar&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt; Review: igual que el anterior&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sa4rm6">これは確かに英語とスペイン語のレビューが混在しているように見えますね!
さて、トレーニングコーパスができたので、最後にレビューとそのタイトルに含まれる単語の分布を確認します。これは要約タスクにおいて特に重要で、学習データとして参考にするデータ中に短すぎる要約が多いと、要約生成時に1つか2つの単語しか出力しないようモデルを偏らせる可能性があります。下のプロットは単語の分布を示しており、タイトルが1-2単語だけに大きく偏っていることがわかります。</p> <div class="flex justify-center" data-svelte-h="svelte-1pwnbfd"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/review-lengths.svg" alt="Word count distributions for the review titles and texts."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/review-lengths-dark.svg" alt="Word count distributions for the review titles and texts."></div> <p data-svelte-h="svelte-buc0xz">この問題に対処し、私達のモデルがより興味深い要約を生成できるように、非常に短いタイトルを持つ例をフィルタリングすることにします。英語とスペイン語のテキストを扱っているので、タイトルを空白で分割する大まかな経験則を元に、信頼できる <code>Dataset.filter()</code> メソッドを以下のように使用します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->books_dataset = books_dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;review_title&quot;</span>].split()) &gt; <span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e5hqyz">さて、コーパスができたところで、このコーパスを使って、Transformerのモデルを微調整してみましょう。</p> <h2 class="relative group"><a id="文章要約用モデル" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#文章要約用モデル"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>文章要約用モデル</span></h2> <p data-svelte-h="svelte-6v0vm9">考えてみれば、文章の要約は機械翻訳と似たような種類のタスクです。レビューのようなテキスト入力があり、それを入力文内の顕著な特徴をとらえた短いバージョンに「翻訳」したいのです。したがって、要約のためのほとんどのTransformerモデルは<a href="/course/ja/chapter1">第1章</a>で最初に出会ったエンコーダとデコーダのアーキテクチャを採用しています。しかし、GPTモデル群のような例外もあり、少数ショット学習設定で使用することも可能です。以下の表は、要約のために微調整が可能な、よく使われる事前学習済みモデルの一覧です。</p> <table data-svelte-h="svelte-1oone28"><thead><tr><th align="center">Transformer model</th> <th>Description</th> <th align="center">Multilingual?</th></tr></thead> <tbody><tr><td align="center"><a href="https://huggingface.co/gpt2-xl" rel="nofollow">GPT-2</a></td> <td>Although trained as an auto-regressive language model, you can make GPT-2 generate summaries by appending “TL;DR” at the end of the input text.</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/google/pegasus-large" rel="nofollow">PEGASUS</a></td> <td>Uses a pretraining objective to predict masked sentences in multi-sentence texts. This pretraining objective is closer to summarization than vanilla language modeling and scores highly on popular benchmarks.</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/t5-base" rel="nofollow">T5</a></td> <td>A universal Transformer architecture that formulates all tasks in a text-to-text framework; e.g., the input format for the model to summarize a document is <code>summarize: ARTICLE</code>.</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/google/mt5-base" rel="nofollow">mT5</a></td> <td>A multilingual version of T5, pretrained on the multilingual Common Crawl corpus (mC4), covering 101 languages.</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/facebook/bart-base" rel="nofollow">BART</a></td> <td>A novel Transformer architecture with both an encoder and a decoder stack trained to reconstruct corrupted input that combines the pretraining schemes of BERT and GPT-2.</td> <td align="center"></td></tr> <tr><td align="center"><a href="https://huggingface.co/facebook/mbart-large-50" rel="nofollow">mBART-50</a></td> <td>A multilingual version of BART, pretrained on 50 languages.</td> <td align="center"></td></tr></tbody></table> <p data-svelte-h="svelte-1f9d46m">この表からわかるように、要約のためのTransformerモデルの大半は(そして実際、ほとんどのNLPタスクも)単言語版です。これはタスクが英語やドイツ語のような利用可能なデータの多い「高リソース」言語である場合は良いのですが、世界中で使われている何千もの他の言語ではそうではありません。幸いなことに、mT5やmBARTのような多言語Transformerモデルもあります。これらのモデルは言語モデリングを使って事前に学習されますが、ひねりが加えられています。1つの言語のコーパスで学習するのではなく、50以上の言語のテキストで一度に共同学習しているのです!</p> <p data-svelte-h="svelte-1d6grua">ここでは、T5をベースにテキストからテキストへのフレームワークで事前学習された興味深いアーキテクチャであるmT5に焦点を当てます。T5では、すべての自然言語処理タスクは「要約:」のようなプロンプト接頭辞で定式化され、生成されたテキストをプロンプトに適応させるようモデルに条件付けされます。下図に示すように、T5は非常に汎用性が高く、1つのモデルで多くのタスクを解決することができます!</p> <div class="flex justify-center" data-svelte-h="svelte-k7lnur"><img class="block dark:hidden" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/t5.svg" alt="Different tasks performed by the T5 architecture."> <img class="hidden dark:block" src="https://huggingface.co/datasets/huggingface-course/documentation-images/resolve/main/en/chapter7/t5-dark.svg" alt="Different tasks performed by the T5 architecture."></div> <p data-svelte-h="svelte-1rzgkca">mT5は接頭辞を使用しませんが、T5の多用途性を共有し、多言語であるという利点があります。さて、モデルを選んだところで、学習用のデータの準備に取りかかりましょう。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-804vx5">✏️ <strong>あなたの番です!</strong> このセクションを終えたら、同じ手法でmBARTを微調整して、mT5がmBARTと比較してどの程度優れているかを見てみましょう。ボーナスポイントとして、英語のレビューだけでT5を微調整してみることもできます。T5には特別な接頭辞プロンプトがあるので、以下の前処理ステップでは入力例の前に<code>summarize:</code>を付ける必要があります。</p></div> <h2 class="relative group"><a id="データの前処理" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#データの前処理"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>データの前処理</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/1m7BerpSq8A" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1f71lrt">次のタスクはレビューとそのタイトルをトークン化しエンコードすることです。いつものように、事前に学習したモデルのチェックポイントに関連付けられたトークナイザーをロードすることから始めます。ここではチェックポイントとして <code>mt5-small</code> を使用します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer
model_checkpoint = <span class="hljs-string">&quot;google/mt5-small&quot;</span>
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-feg927">💡 NLPプロジェクトの初期段階では、「小さな」モデルのクラスを少量のデータサンプルで学習させるのがよい方法です。これにより、エンド・ツー・エンドのワークフローに向けたデバッグと反復をより速く行うことができます。結果に自信が持てたら、モデルのチェックポイントを変更するだけで、いつでもモデルをスケールアップすることができます。</p></div> <p data-svelte-h="svelte-1xrxxbn">少量のサンプルでmT5トークナイザーをテストしてみましょう</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->inputs = tokenizer(<span class="hljs-string">&quot;I loved reading the Hunger Games!&quot;</span>)
inputs<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;input_ids&#x27;</span>: [<span class="hljs-number">336</span>, <span class="hljs-number">259</span>, <span class="hljs-number">28387</span>, <span class="hljs-number">11807</span>, <span class="hljs-number">287</span>, <span class="hljs-number">62893</span>, <span class="hljs-number">295</span>, <span class="hljs-number">12507</span>, <span class="hljs-number">1</span>], <span class="hljs-string">&#x27;attention_mask&#x27;</span>: [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1v3sify">ここで、<a href="/course/ja/chapter3">第3章</a> の最初の微調整の実験で遭遇した、おなじみの <code>input_ids</code><code>attention_mask</code> を見ることができます。これらの入力IDをトークナイザーの <code>convert_ids_to_tokens()</code> 関数でデコードして、どのようなトークナイザーなのかを見てみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenizer.convert_ids_to_tokens(inputs.input_ids)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->[<span class="hljs-string">&#x27;▁I&#x27;</span>, <span class="hljs-string">&#x27;&#x27;</span>, <span class="hljs-string">&#x27;loved&#x27;</span>, <span class="hljs-string">&#x27;▁reading&#x27;</span>, <span class="hljs-string">&#x27;▁the&#x27;</span>, <span class="hljs-string">&#x27;▁Hung&#x27;</span>, <span class="hljs-string">&#x27;er&#x27;</span>, <span class="hljs-string">&#x27;▁Games&#x27;</span>, <span class="hljs-string">&#x27;&lt;/s&gt;&#x27;</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14jonz7">Unicodeの特殊文字 <code></code> とシーケンスの終わりを意味するトークン <code>&lt;/s&gt;</code> は、SentencePieceトークナイザーを扱っていることを示しています。これは <a href="/course/ja/chapter6">第6章</a> で説明したユニグラムセグメント化アルゴリズムに基づいています。ユニグラムは多言語コーパスに特に有効です。ユニグラムによりSentencePieceは口調、句読点、空白などに依存しなくなるので、日本語のように空白文字を持たない多くの言語に対して効果的になります。</p> <p data-svelte-h="svelte-10oayda">このコーパスをトークン化するために、要約に関連する些細な問題に対処する必要があります。ラベルもテキストなので、モデルの最大コンテキストサイズを超える可能性があります。これは、レビューとそのタイトルの両方に切り詰めを適用して、過度に長い入力をモデルに渡さないようにする必要があることを意味します。🤗 Transformers のトークナイザーは、入力と並行してラベルをトークン化することができる便利な <code>as_target_tokenizer()</code> 関数を提供します。これは通常、まず入力をエンコードし、次にラベルを別の列としてエンコードする前処理関数の内部で、コンテキストマネージャーを使用して行われます。</p> <p data-svelte-h="svelte-pnjhyc">以下は、mT5 用のそのような関数の例です。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->max_input_length = <span class="hljs-number">512</span>
max_target_length = <span class="hljs-number">30</span>
<span class="hljs-keyword">def</span> <span class="hljs-title function_">preprocess_function</span>(<span class="hljs-params">examples</span>):
model_inputs = tokenizer(
examples[<span class="hljs-string">&quot;review_body&quot;</span>], max_length=max_input_length, truncation=<span class="hljs-literal">True</span>
)
<span class="hljs-comment"># Set up the tokenizer for targets</span>
<span class="hljs-keyword">with</span> tokenizer.as_target_tokenizer():
labels = tokenizer(
examples[<span class="hljs-string">&quot;review_title&quot;</span>], max_length=max_target_length, truncation=<span class="hljs-literal">True</span>
)
model_inputs[<span class="hljs-string">&quot;labels&quot;</span>] = labels[<span class="hljs-string">&quot;input_ids&quot;</span>]
<span class="hljs-keyword">return</span> model_inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-940boc">何が起こっているのか理解するために、このコードを見ていきましょう。まず最初に、<code>max_input_length</code><code>max_target_length</code>の値を定義しました。これはレビューとタイトルの長さの上限を設定するものです。通常、レビューの本文はタイトルよりもはるかに大きいので、これらの値を適宜スケーリングしています。次に、<code>preprocess_function()</code> 自身で、レビューが最初にトークン化され、次に <code>as_target_tokenizer()</code> でタイトルがトークン化されていることがわかります。</p> <p data-svelte-h="svelte-1owvo6e"><code>preprocess_function()</code> があれば、あとはこのコースで散々使ってきた便利な <code>Dataset.map()</code> 関数を使ってコーパス全体をトークン化するのは簡単なことです。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets = books_dataset.<span class="hljs-built_in">map</span>(preprocess_function, batched=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9936gc">さて、コーパスの前処理が終わったところで、要約によく使われるいくつかの指標を見てみましょう。これから見るように、機械が生成した文章の品質を測る際に、万能の手法は存在しません。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1t3m4qq">💡 上の <code>Dataset.map()</code> 関数で <code>batched=True</code> を使っていることにお気づきかもしれません。これはサンプルを1,000のバッチ(デフォルト)でエンコードし、🤗 Transformersの高速トークナイザーが持つマルチスレッド機能を利用できるようにするものです。可能であれば、前処理を最大限に活用するために <code>batched=True</code> を使ってみてください!</p></div> <h2 class="relative group"><a id="文章要約のための指標" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#文章要約のための指標"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>文章要約のための指標</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/TMshhnrEXlg" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-1xmcc0c">このコースで取り上げた他のほとんどのタスクと比較して、要約や翻訳のようなテキスト生成タスクの性能測定はそれほど簡単ではありません。例えば、「ハンガーゲームを読むのが好きだ」というレビューがあったとして、「ハンガーゲームが大好きだ」「ハンガーゲームは素晴らしい読み物だ」など、有効な要約が複数存在します。明らかに、生成された要約とラベルの間にある種の完全な一致を適用することは良い解決策ではありません。私たちは皆、独自の文体を持っているので、そのような測定指標を用いては人間でさえうまくいかないでしょう。</p> <p data-svelte-h="svelte-hkir7g">要約のために、最もよく使われる指標の1つが<a href="https://en.wikipedia.org/wiki/ROUGE_(metric)" rel="nofollow">ROUGE score</a> (Recall-Oriented Understudy for Gisting Evaluationの略)です。この指標の背後にある基本的な考え方は、生成された要約を、通常人間が作成するした参照要約のセットと比較することです。これをより正確にするために、次の2つの要約を比較したいとします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->generated_summary = <span class="hljs-string">&quot;I absolutely loved reading the Hunger Games&quot;</span>
reference_summary = <span class="hljs-string">&quot;I loved reading the Hunger Games&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-oy7jfx">比較する一つの方法として、重複している単語の数を数えることが考えられますが、この場合、6個となります。しかし、これは少し粗いので、代わりにROUGEは重なり合った部分の <em>適合率</em><em>再現率</em> のスコアを計算することを基本としています。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1x0nzu3">🙋 もしあなたが適合率や再現率について初めて聞いたとしても心配しないでください。すべてを明らかにするために、いくつかの明確な例を一緒に見ていきましょう。これらの指標は通常分類タスクで遭遇するので、その分類タスクの場合に適合率と再現率がどのように定義されているかを理解したい場合は、 <code>scikit-learn</code> <a href="https://scikit-learn.org/stable/auto_examples/model_selection/plot_precision_recall.html" rel="nofollow">guides</a> をチェックアウトすることをお勧めします。</p></div> <p>ROUGEでは、生成した要約に参照元の要約がどれだけ取り込まれたかを再現率で測定します。単語を比較するだけであれば、以下の式によって再現率を計算することができます。
<!-- HTML_TAG_START --><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mrow><mi mathvariant="normal">R</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">l</mi></mrow><mo>=</mo><mfrac><mrow><mi mathvariant="normal">N</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">v</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">g</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi></mrow><mrow><mi mathvariant="normal">T</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mtext></mtext><mi mathvariant="normal">n</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi><mtext></mtext><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mtext></mtext><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">f</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">e</mi><mtext></mtext><mi mathvariant="normal">s</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">y</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex"> \mathrm{Recall} = \frac{\mathrm{Number\,of\,overlapping\, words}}{\mathrm{Total\, number\, of\, words\, in\, reference\, summary}} </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6944em;"></span><span class="mord"><span class="mord mathrm">Recall</span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:2.2519em;vertical-align:-0.8804em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.3714em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Total</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">in</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">reference</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">summary</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">overlapping</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.8804em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span><!-- HTML_TAG_END --></p> <p>上記の簡単な例では、この式は6/6 = 1の完全な再現率を与えます。つまり、参照要約のすべての単語がモデルによって生成されたことになります。これは素晴らしいことだと思うかもしれませんが、もし私達のモデルが生成した要約が「ハンガーゲームを一晩中読むのが本当に本当に好きだった」であったとしたらどうでしょう。この場合も完璧な再現率が得られますが、冗長であるため、間違いなくより悪い要約となります。このようなシナリオに対処するために、我々は私達は適合率も計算します。これはROUGEの文脈において、生成された要約がどれだけ関連していたかを測定するものです。
<!-- HTML_TAG_START --><span class="katex-display"><span class="katex"><span class="katex-mathml"><math xmlns="http://www.w3.org/1998/Math/MathML" display="block"><semantics><mrow><mrow><mi mathvariant="normal">P</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">c</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">s</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">n</mi></mrow><mo>=</mo><mfrac><mrow><mi mathvariant="normal">N</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">v</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">l</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">p</mi><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">g</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi></mrow><mrow><mi mathvariant="normal">T</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">l</mi><mtext></mtext><mi mathvariant="normal">n</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">b</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mtext></mtext><mi mathvariant="normal">o</mi><mi mathvariant="normal">f</mi><mtext></mtext><mi mathvariant="normal">w</mi><mi mathvariant="normal">o</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">d</mi><mi mathvariant="normal">s</mi><mtext></mtext><mi mathvariant="normal">i</mi><mi mathvariant="normal">n</mi><mtext></mtext><mi mathvariant="normal">g</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">n</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">t</mi><mi mathvariant="normal">e</mi><mi mathvariant="normal">d</mi><mtext></mtext><mi mathvariant="normal">s</mi><mi mathvariant="normal">u</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">m</mi><mi mathvariant="normal">a</mi><mi mathvariant="normal">r</mi><mi mathvariant="normal">y</mi></mrow></mfrac></mrow><annotation encoding="application/x-tex"> \mathrm{Precision} = \frac{\mathrm{Number\,of\,overlapping\, words}}{\mathrm{Total\, number\, of\, words\, in\, generated\, summary}} </annotation></semantics></math></span><span class="katex-html" aria-hidden="true"><span class="base"><span class="strut" style="height:0.6833em;"></span><span class="mord"><span class="mord mathrm">Precision</span></span><span class="mspace" style="margin-right:0.2778em;"></span><span class="mrel">=</span><span class="mspace" style="margin-right:0.2778em;"></span></span><span class="base"><span class="strut" style="height:2.2519em;vertical-align:-0.8804em;"></span><span class="mord"><span class="mopen nulldelimiter"></span><span class="mfrac"><span class="vlist-t vlist-t2"><span class="vlist-r"><span class="vlist" style="height:1.3714em;"><span style="top:-2.314em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Total</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">in</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">generated</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">summary</span></span></span></span><span style="top:-3.23em;"><span class="pstrut" style="height:3em;"></span><span class="frac-line" style="border-bottom-width:0.04em;"></span></span><span style="top:-3.677em;"><span class="pstrut" style="height:3em;"></span><span class="mord"><span class="mord"><span class="mord mathrm">Number</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.07778em;">of</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm" style="margin-right:0.01389em;">overlapping</span><span class="mspace" style="margin-right:0.1667em;"></span><span class="mord mathrm">words</span></span></span></span></span><span class="vlist-s"></span></span><span class="vlist-r"><span class="vlist" style="height:0.8804em;"><span></span></span></span></span></span><span class="mclose nulldelimiter"></span></span></span></span></span></span><!-- HTML_TAG_END --></p> <p data-svelte-h="svelte-grwz7e">これを冗長な要約に適用すると、適合率は6/10 = 0.6となり、短い要約で得られた6/7 = 0.86よりもかなり悪くなります。実際には、通常、適合率と再現率の両方が計算され、そして、F1スコア(精度とリコールの調和平均)が報告されます。これは🤗 Datasetsで、まず <code>rouge_score</code> パッケージをインストールすることで簡単に行うことができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install rouge_score<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-184bhe5">そして、ROUGE指標を読み込みます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> evaluate
rouge_score = evaluate.load(<span class="hljs-string">&quot;rouge&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1aryl6q">そして、<code>rouge_score.compute()</code>関数を使って、すべての指標を一度に計算することができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->scores = rouge_score.compute(
predictions=[generated_summary], references=[reference_summary]
)
scores<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;rouge1&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)),
<span class="hljs-string">&#x27;rouge2&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>), mid=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>), high=Score(precision=<span class="hljs-number">0.67</span>, recall=<span class="hljs-number">0.8</span>, fmeasure=<span class="hljs-number">0.73</span>)),
<span class="hljs-string">&#x27;rougeL&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)),
<span class="hljs-string">&#x27;rougeLsum&#x27;</span>: AggregateScore(low=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), mid=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>), high=Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>))}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1o8gzhy">おっと、この出力には多くの情報が含まれていますね。</p> <p data-svelte-h="svelte-1pt66j7">これは全て何を意味するのでしょうか?まず、🤗 Datasetsは適合率、再現率、F1スコアの信頼区間を計算します。これらはここに表示されている <code>low</code><code>mid</code><code>high</code> の属性です。さらに、🤗 Datasetsは生成された要約と参照された要約を比較する際に、異なるタイプのテキストの粒度に基づいた様々なROUGEスコアを計算します。<code>rouge1</code>のバリエーションはユニグラムの重なり具合です。これは単語のオーバーラップを言い換えただけのもので、まさに上で説明したような指標です。これを確認するために、スコアの <code>mid</code> 値を引き出してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->scores[<span class="hljs-string">&quot;rouge1&quot;</span>].mid<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Score(precision=<span class="hljs-number">0.86</span>, recall=<span class="hljs-number">1.0</span>, fmeasure=<span class="hljs-number">0.92</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-he51v6">素晴らしい!適合率と再現率の数値が一致しました。では、他のROUGEスコアについてはどうでしょうか?
<code>rouge2</code> はビッグラム(単語のペアの重なり)の重なりを測定し、 <code>rougeL</code><code>rougeLsum</code> は生成されたサマリーと参照サマリーで最も長い共通部分文字列を探して、最も長くマッチする単語列を測定します。<code>rougeLsum</code> の “sum” は、 <code>rougeL</code> が個々の文の平均値として計算されるのに対し、この指標は要約全体に対して計算されるという事実を表している。</p> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-1w3lu3k">✏️ <strong>あなたの番です!</strong> 生成と参照要約の独自の例を作成し、結果のROUGEスコアが精度とリコールの公式を基にした手動計算と一致するかどうかを確認することができます。ボーナスポイントとして、テキストをビッグラムに分割し、<code>rouge2</code> 指標の適合率と制限率を比較します。</p></div> <p data-svelte-h="svelte-1f1r1g8">このROUGEスコアを使ってモデルのパフォーマンスを追跡していきますが、その前に優れたNLP実践者がすべきこと、それは強力かつシンプルなベースラインを作成することです。</p> <h3 class="relative group"><a id="強力なベースラインの作成" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#強力なベースラインの作成"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>強力なベースラインの作成</span></h3> <p data-svelte-h="svelte-1owpy0q">テキスト要約の一般的なベースラインは、単純に記事の最初の3つのセンテンスを取ることで、しばしば <em>lead-3</em> ベースラインと呼ばれます。文の境界を追跡するためにピリオドを使うこともできますが、このやり方は “U.S.” や “U.N.” のような頭字語では失敗します。そこで、このようなケースを処理するための優れたアルゴリズムを含む <code>nltk</code> ライブラリを使用することにします。このパッケージは、以下のように <code>pip</code> を用いてインストールすることができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->!pip install nltk<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-18h1szi">そして、句読点規則をダウンロードしてください。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> nltk
nltk.download(<span class="hljs-string">&quot;punkt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1dq33gu">次に、<code>nltk</code>からセンテンストークナイザーをインポートし、レビューの最初の3文を抽出する簡単な関数を作成します。テキストの要約では、各要約を改行で区切るのが慣例なので、これも含めて学習例でテストしてみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> nltk.tokenize <span class="hljs-keyword">import</span> sent_tokenize
<span class="hljs-keyword">def</span> <span class="hljs-title function_">three_sentence_summary</span>(<span class="hljs-params">text</span>):
<span class="hljs-keyword">return</span> <span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(text)[:<span class="hljs-number">3</span>])
<span class="hljs-built_in">print</span>(three_sentence_summary(books_dataset[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">1</span>][<span class="hljs-string">&quot;review_body&quot;</span>]))<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;I grew up reading Koontz, and years ago, I stopped,convinced i had &quot;outgrown&quot; him.&#x27;</span>
<span class="hljs-string">&#x27;Still,when a friend was looking for something suspenseful too read, I suggested Koontz.&#x27;</span>
<span class="hljs-string">&#x27;She found Strangers.&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-wnmvqs">これはうまくいきそうなので、今度はデータセットからこれらの「要約」を抽出し、ベースラインのROUGEスコアを計算する関数を実装してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">evaluate_baseline</span>(<span class="hljs-params">dataset, metric</span>):
summaries = [three_sentence_summary(text) <span class="hljs-keyword">for</span> text <span class="hljs-keyword">in</span> dataset[<span class="hljs-string">&quot;review_body&quot;</span>]]
<span class="hljs-keyword">return</span> metric.compute(predictions=summaries, references=dataset[<span class="hljs-string">&quot;review_title&quot;</span>])<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1252ppu">そして、この関数を使って検証セットのROUGEスコアを計算し、Pandasを使って少しきれいにすることができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> pandas <span class="hljs-keyword">as</span> pd
score = evaluate_baseline(books_dataset[<span class="hljs-string">&quot;validation&quot;</span>], rouge_score)
rouge_names = [<span class="hljs-string">&quot;rouge1&quot;</span>, <span class="hljs-string">&quot;rouge2&quot;</span>, <span class="hljs-string">&quot;rougeL&quot;</span>, <span class="hljs-string">&quot;rougeLsum&quot;</span>]
rouge_dict = <span class="hljs-built_in">dict</span>((rn, <span class="hljs-built_in">round</span>(score[rn].mid.fmeasure * <span class="hljs-number">100</span>, <span class="hljs-number">2</span>)) <span class="hljs-keyword">for</span> rn <span class="hljs-keyword">in</span> rouge_names)
rouge_dict<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">16.74</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">8.83</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">15.6</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">15.96</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-14f8lff">rouge2`のスコアが他よりかなり低いことがわかります。これは、レビューのタイトルが一般的に簡潔であるため、lead-3のベースラインが冗長すぎるという事実を反映していると思われます。これでベースラインができたので、次はmT5の微調整を行います!</p> <h2 class="relative group"><a id="trainer-api-を使って-mt5-を微調整する" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#trainer-api-を使って-mt5-を微調整する"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>Trainer API を使って mT5 を微調整する</span></h2> <p data-svelte-h="svelte-1lhwse">要約のためのモデルの微調整は、この章で取り上げた他のタスクと非常によく似ています。まず最初に行うべきことは、<code>mt5-small</code> チェックポイントから事前学習したモデルをロードすることです。要約はシーケンス間タスクなので、<code>AutoModelForSeq2SeqLM</code> クラスを使用してモデルをロードすることができます。これは自動的に重みをダウンロードし、キャッシュします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForSeq2SeqLM
model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-xg9aw0">💡 下流のタスクでモデルを微調整に関する警告が表示されないことを不思議に思うかもしれませんが、それはシーケンス間タスクでは、ネットワークのすべての重みが保持されるからです。これを<a href="/course/ja/chapter3">第3章</a>のテキスト分類モデルと比較してみましょう。テキスト分類モデルでは、事前学習したモデルの先頭をランダムに初期化したネットワークに置き換えています。</p></div> <p data-svelte-h="svelte-1xii4k">次に必要なのは、ハンギングフェイス ハブにログインすることです。このコードをノートブックで実行する場合は、次のユーティリティ関数で実行できます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
notebook_login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9qe1fg">これにより、ウィジェットが表示され、認証情報を入力することができます。または、ターミナルで以下のコマンドを実行し、ログインすることもできます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->huggingface-<span class="hljs-keyword">cli</span> login<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1p2og3r">トレーニング中にROUGEスコアを計算するために、要約を生成する必要があります。幸いなことに、🤗 Transformersは専用の <code>Seq2SeqTrainingArguments</code><code>Seq2SeqTrainer</code> クラスを提供し、私たちのために自動的にこれを行うことができます!
このクラスがどのように機能するかを見てみるために、まず実験用にハイパーパラメータとその他の引数を定義しましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Seq2SeqTrainingArguments
batch_size = <span class="hljs-number">8</span>
num_train_epochs = <span class="hljs-number">8</span>
<span class="hljs-comment"># Show the training loss with every epoch</span>
logging_steps = <span class="hljs-built_in">len</span>(tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>]) // batch_size
model_name = model_checkpoint.split(<span class="hljs-string">&quot;/&quot;</span>)[-<span class="hljs-number">1</span>]
args = Seq2SeqTrainingArguments(
output_dir=<span class="hljs-string">f&quot;<span class="hljs-subst">{model_name}</span>-finetuned-amazon-en-es&quot;</span>,
evaluation_strategy=<span class="hljs-string">&quot;epoch&quot;</span>,
learning_rate=<span class="hljs-number">5.6e-5</span>,
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
weight_decay=<span class="hljs-number">0.01</span>,
save_total_limit=<span class="hljs-number">3</span>,
num_train_epochs=num_train_epochs,
predict_with_generate=<span class="hljs-literal">True</span>,
logging_steps=logging_steps,
push_to_hub=<span class="hljs-literal">True</span>,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1d453h">ここでは、 <code>predict_with_generate</code> 引数を設定し、各エポックの ROUGE スコアを計算できるように、評価中に要約を生成するように指示しました。<a href="/course/ja/chapter1">第1章</a> で説明したように、デコーダはトークンを一つずつ予測して推論を行いますが、これはモデルの <code>generate()</code> メソッドによって実装されています。<code>predict_with_generate=True</code> を設定すると、 <code>Seq2SeqTrainer</code> がそのメソッドを使用して評価を行うようになります。また、学習率、エポック回数、重み減衰などのデフォルトのハイパーパラメータを調整し、 <code>save_total_limit</code> オプションを設定して、学習中のチェックポイントを3つまでしか保存しないようにしました。これはmT5の「小さい」バージョンでさえ、ハードディスクの容量を約1GB使用しており、保存するコピーを制限すれば、少し容量を節約することができるからです。</p> <p data-svelte-h="svelte-8wxcwv"><code>push_to_hub=True</code> を指定すると、学習後にモデルを Hub にプッシュすることができます。ユーザープロファイルの下の、 <code>output_dir</code> で定義された場所にリポジトリが作成されます。なお、 <code>hub_model_id</code> 引数で、プッシュしたいリポジトリの名前を指定することができます。(特に、組織にプッシュする場合はこの引数を使用する必要があります)。例えば、モデルを <a href="https://huggingface.co/huggingface-course" rel="nofollow"><code>huggingface-course</code> organization</a> にプッシュする場合、<code>Seq2SeqTrainingArguments</code><code>hub_model_id=&quot;huggingface-course/mt5-finetuned-amazon-en-es&quot;</code> を追加しています。</p> <p data-svelte-h="svelte-9fc8uz">次に必要なことは、学習中にモデルを評価できるように、 <code>compute_metrics()</code> 関数をトレーナーに提供することです。
要約タスクでは、予測タスクのようにシンプルに <code>rouge_score.compute()</code> を呼ぶのと少し異なります。なぜなら、ROUGE スコアを計算する前に、出力とラベルをテキストにデコードする必要があるからです。以下の関数はまさにそれを行うもので、さらに <code>nltk</code><code>sent_tokenize()</code> 関数を利用して、要約文章を改行で区切るようにしています。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
<span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_metrics</span>(<span class="hljs-params">eval_pred</span>):
predictions, labels = eval_pred
<span class="hljs-comment"># Decode generated summaries into text</span>
decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># Replace -100 in the labels as we can&#x27;t decode them</span>
labels = np.where(labels != -<span class="hljs-number">100</span>, labels, tokenizer.pad_token_id)
<span class="hljs-comment"># Decode reference summaries into text</span>
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-comment"># ROUGE expects a newline after each sentence</span>
decoded_preds = [<span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(pred.strip())) <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> decoded_preds]
decoded_labels = [<span class="hljs-string">&quot;\n&quot;</span>.join(sent_tokenize(label.strip())) <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> decoded_labels]
<span class="hljs-comment"># Compute ROUGE scores</span>
result = rouge_score.compute(
predictions=decoded_preds, references=decoded_labels, use_stemmer=<span class="hljs-literal">True</span>
)
<span class="hljs-comment"># Extract the median scores</span>
result = {key: value.mid.fmeasure * <span class="hljs-number">100</span> <span class="hljs-keyword">for</span> key, value <span class="hljs-keyword">in</span> result.items()}
<span class="hljs-keyword">return</span> {k: <span class="hljs-built_in">round</span>(v, <span class="hljs-number">4</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> result.items()}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-16ic40b">次に、シーケンス間タスクのためのデータコレーターを定義する必要があります。mT5はエンコーダ・デコーダのTransformerモデルなので、バッチを準備する際の一つのちょっとした差異は、デコード中にラベルを右に1つシフトする必要があることです。これは、デコーダが以前の真実のラベルしか見ないようにするためで、現在や将来のラベルをモデルに記憶させないようにしうます。これは<a href="/course/ja/chapter7/6">因果言語モデリング</a>のようなタスクでマスクされた自己注意が入力に適用される方法に似ています。</p> <p data-svelte-h="svelte-1001k8h">幸運なことに、🤗 Transformers は <code>DataCollatorForSeq2Seq</code> コレーターを提供し、入力とラベルを動的にパディングしてくれます。このコレーターをインスタンス化するには、単に <code>tokenizer</code><code>model</code> を提供する必要があります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForSeq2Seq
data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-5tw7o6">それでは、このコレーターが少量のサンプルをバッチで与えたときに何を生成するかを見てみましょう。まず、文字列を含む列を削除する必要があります。コレーターはこれらの要素をどのようにパディングするかを知らないからです。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets = tokenized_datasets.remove_columns(
books_dataset[<span class="hljs-string">&quot;train&quot;</span>].column_names
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ypyn2c">コレーター は <code>dict</code> のリストを受け取り、各 <code>dict</code> はデータセット内の 1 つの例を表している事を期待しています。したがって、データをコレーターに渡す前に期待通りの形式に変換する必要があります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->features = [tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>][i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-number">2</span>)]
data_collator(features)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;attention_mask&#x27;</span>: tensor([[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>,
<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>,
<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;input_ids&#x27;</span>: tensor([[ <span class="hljs-number">1494</span>, <span class="hljs-number">259</span>, <span class="hljs-number">8622</span>, <span class="hljs-number">390</span>, <span class="hljs-number">259</span>, <span class="hljs-number">262</span>, <span class="hljs-number">2316</span>, <span class="hljs-number">3435</span>, <span class="hljs-number">955</span>,
<span class="hljs-number">772</span>, <span class="hljs-number">281</span>, <span class="hljs-number">772</span>, <span class="hljs-number">1617</span>, <span class="hljs-number">263</span>, <span class="hljs-number">305</span>, <span class="hljs-number">14701</span>, <span class="hljs-number">260</span>, <span class="hljs-number">1385</span>,
<span class="hljs-number">3031</span>, <span class="hljs-number">259</span>, <span class="hljs-number">24146</span>, <span class="hljs-number">332</span>, <span class="hljs-number">1037</span>, <span class="hljs-number">259</span>, <span class="hljs-number">43906</span>, <span class="hljs-number">305</span>, <span class="hljs-number">336</span>,
<span class="hljs-number">260</span>, <span class="hljs-number">1</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>],
[ <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>, <span class="hljs-number">260</span>, <span class="hljs-number">112240</span>, <span class="hljs-number">15192</span>, <span class="hljs-number">305</span>,
<span class="hljs-number">53198</span>, <span class="hljs-number">276</span>, <span class="hljs-number">259</span>, <span class="hljs-number">74060</span>, <span class="hljs-number">263</span>, <span class="hljs-number">260</span>, <span class="hljs-number">459</span>, <span class="hljs-number">25640</span>, <span class="hljs-number">776</span>,
<span class="hljs-number">2119</span>, <span class="hljs-number">336</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2220</span>, <span class="hljs-number">259</span>, <span class="hljs-number">18896</span>, <span class="hljs-number">288</span>, <span class="hljs-number">4906</span>, <span class="hljs-number">288</span>,
<span class="hljs-number">1037</span>, <span class="hljs-number">3931</span>, <span class="hljs-number">260</span>, <span class="hljs-number">7083</span>, <span class="hljs-number">101476</span>, <span class="hljs-number">1143</span>, <span class="hljs-number">260</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;labels&#x27;</span>: tensor([[ <span class="hljs-number">7483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2364</span>, <span class="hljs-number">15695</span>, <span class="hljs-number">1</span>, -<span class="hljs-number">100</span>],
[ <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>, <span class="hljs-number">1</span>]]), <span class="hljs-string">&#x27;decoder_input_ids&#x27;</span>: tensor([[ <span class="hljs-number">0</span>, <span class="hljs-number">7483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">2364</span>, <span class="hljs-number">15695</span>, <span class="hljs-number">1</span>],
[ <span class="hljs-number">0</span>, <span class="hljs-number">259</span>, <span class="hljs-number">27531</span>, <span class="hljs-number">13483</span>, <span class="hljs-number">259</span>, <span class="hljs-number">7505</span>]])}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1o4z71f">ここで注目すべきは、最初の例は2番目の例よりも長いので、2番目の例の <code>input_ids</code><code>attention_mask</code> は右側に <code>[PAD]</code> トークン (ID は <code>0</code>) でパディングされていることです。同様に、<code>labels</code><code>-100</code> でパディングされていることがわかります。これは、パディングトークンが損失関数によって無視されることを確認するためです。そして最後に、新しい <code>decoder_input_ids</code> を見ると、最初のエントリに <code>[PAD]</code> トークンを挿入してラベルを右にシフトしていることが確認できます。</p> <p data-svelte-h="svelte-1sx6091">これでようやく、トレーニングに必要な材料が揃いました。あとは、標準的な引数でトレーナーを実体化するだけです。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Seq2SeqTrainer
trainer = Seq2SeqTrainer(
model,
args,
train_dataset=tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>],
eval_dataset=tokenized_datasets[<span class="hljs-string">&quot;validation&quot;</span>],
data_collator=data_collator,
tokenizer=tokenizer,
compute_metrics=compute_metrics,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gsr0uj">そして、トレーニングランを開始します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-sbqohp">学習中はエポック毎に学習損失が減少し、ROUGE スコアが増加するのが分かるはずです。学習が完了したら、<code>Trainer.evaluate()</code>を実行して最終的な ROUGE スコアを確認することができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.evaluate()<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->{<span class="hljs-string">&#x27;eval_loss&#x27;</span>: <span class="hljs-number">3.028524398803711</span>,
<span class="hljs-string">&#x27;eval_rouge1&#x27;</span>: <span class="hljs-number">16.9728</span>,
<span class="hljs-string">&#x27;eval_rouge2&#x27;</span>: <span class="hljs-number">8.2969</span>,
<span class="hljs-string">&#x27;eval_rougeL&#x27;</span>: <span class="hljs-number">16.8366</span>,
<span class="hljs-string">&#x27;eval_rougeLsum&#x27;</span>: <span class="hljs-number">16.851</span>,
<span class="hljs-string">&#x27;eval_gen_len&#x27;</span>: <span class="hljs-number">10.1597</span>,
<span class="hljs-string">&#x27;eval_runtime&#x27;</span>: <span class="hljs-number">6.1054</span>,
<span class="hljs-string">&#x27;eval_samples_per_second&#x27;</span>: <span class="hljs-number">38.982</span>,
<span class="hljs-string">&#x27;eval_steps_per_second&#x27;</span>: <span class="hljs-number">4.914</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-19uslc0">スコアから、私達のモデルがlead-3のベースラインを見事に上回ったことがわかります。いいですね!
最後に、以下のようにモデルの重みをハブにプッシュします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->trainer.push_to_hub(<span class="hljs-attribute">commit_message</span>=<span class="hljs-string">&quot;Training complete&quot;</span>, <span class="hljs-attribute">tags</span>=<span class="hljs-string">&quot;summarization&quot;</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;https://huggingface.co/huggingface-course/mt5-finetuned-amazon-en-es/commit/aa0536b829b28e73e1e4b94b8a5aacec420d40e0&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1k7jr0c">これは、ハブにすべてのファイルをアップロードする前に、チェックポイントと設定ファイルを <code>output_dir</code> に保存するものです。引数に <code>tags</code> を指定することで、Hub 上のウィジェットが mT5 アーキテクチャに関連付けられたデフォルトのテキスト生成用ではなく、要約パイプライン用のものになることも確認できます (モデルタグに関する詳細については、 <a href="https://huggingface.co/docs/hub/main#how-is-a-models-type-of-inference-api-and-widget-determined" rel="nofollow">🤗 Hub documentation</a>を参照してください)。
<code>trainer.push_to_hub()</code> の出力は Git のコミットハッシュへの URL で、モデルリポジトリに加えられた変更を簡単に確認することができます!</p> <p data-svelte-h="svelte-u549u">このセクションの最後に、🤗 Accelerate が提供する低レベルの機能を使って mT5 を微調整することもできる方法を見てみましょう。</p> <h2 class="relative group"><a id="mt5モデルを--accelerate-を使って微調整する" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mt5モデルを--accelerate-を使って微調整する"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>mT5モデルを 🤗 Accelerate を使って微調整する</span></h2> <p data-svelte-h="svelte-1vkiikr">🤗 Accelerateを使ったモデルの微調整は、<a href="/course/ja/chapter3">第3章</a>で行ったテキスト分類の例と非常によく似ています。主な違いは、学習時に要約を明示的に生成する必要があることと、ROUGEスコアの計算方法を定義することです(<code>Seq2SeqTrainer</code>が生成の面倒をみてくれたことを思い出してください)。では、この2つの要件を🤗 Accelerateでどのように実装するか見てみましょう。</p> <h3 class="relative group"><a id="トレーニングのための準備" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#トレーニングのための準備"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>トレーニングのための準備</span></h3> <p data-svelte-h="svelte-7tqvsm">まず最初に行うべきことは、各分割に対して <code>DataLoader</code> を作成することです。PyTorchのデータローダーはテンソルのバッチを想定しているので、データセットのフォーマットを <code>&quot;torch&quot;</code> に設定する必要があります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->tokenized_datasets.set_format(<span class="hljs-string">&quot;torch&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-tier4v">これでテンソルだけのデータセットができたので、次にやることは <code>DataCollatorForSeq2Seq</code> を再び実体化することです。そのためには、新しいバージョンのモデルを用意する必要があるので、キャッシュからロードし直しましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-pus0vm">次に、データコレーターを実態化し、これを使用してデータローダーを定義します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.utils.data <span class="hljs-keyword">import</span> DataLoader
batch_size = <span class="hljs-number">8</span>
train_dataloader = DataLoader(
tokenized_datasets[<span class="hljs-string">&quot;train&quot;</span>],
shuffle=<span class="hljs-literal">True</span>,
collate_fn=data_collator,
batch_size=batch_size,
)
eval_dataloader = DataLoader(
tokenized_datasets[<span class="hljs-string">&quot;validation&quot;</span>], collate_fn=data_collator, batch_size=batch_size
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-157xrx7">次に行うことは、使用するオプティマイザーを定義することです。他の例と同様に、ほとんどの問題でうまく機能する <code>AdamW</code> を使用することにします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> torch.optim <span class="hljs-keyword">import</span> AdamW
optimizer = AdamW(model.parameters(), lr=<span class="hljs-number">2e-5</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-cliuhs">最後に、モデル、オプティマイザー、データロードを <code>accelerator.prepare()</code> メソッドに渡します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> accelerate <span class="hljs-keyword">import</span> Accelerator
accelerator = Accelerator()
model, optimizer, train_dataloader, eval_dataloader = accelerator.prepare(
model, optimizer, train_dataloader, eval_dataloader
)<!-- HTML_TAG_END --></pre></div> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-tre0vw">🚨 TPUでトレーニングする場合は、上記のコードをすべて専用のトレーニング関数に移動する必要があります。詳しくは<a href="/course/ja/chapter3">第3章</a>を参照してください。</p></div> <p data-svelte-h="svelte-1i58yt8">さて、オブジェクトの準備ができたので、残すは3つです。</p> <ul data-svelte-h="svelte-ux6piu"><li>学習率のスケジュールを定義する。</li> <li>評価用の要約を後処理する関数を実装する。</li> <li>ハブ上にモデルをプッシュできるリポジトリを作成する。</li></ul> <p data-svelte-h="svelte-1vplhg0">学習率のスケジュールには、前節までの標準的な線形なものを使うことにします。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> get_scheduler
num_train_epochs = <span class="hljs-number">10</span>
num_update_steps_per_epoch = <span class="hljs-built_in">len</span>(train_dataloader)
num_training_steps = num_train_epochs * num_update_steps_per_epoch
lr_scheduler = get_scheduler(
<span class="hljs-string">&quot;linear&quot;</span>,
optimizer=optimizer,
num_warmup_steps=<span class="hljs-number">0</span>,
num_training_steps=num_training_steps,
)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1fjvpg5">後処理として、生成された要約を改行で区切られた文に分割する関数が必要です。これはROUGE指標が期待する形式であり、次のようなコードの断片でこれを実現できます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">postprocess_text</span>(<span class="hljs-params">preds, labels</span>):
preds = [pred.strip() <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
labels = [label.strip() <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> labels]
<span class="hljs-comment"># ROUGE expects a newline after each sentence</span>
preds = [<span class="hljs-string">&quot;\n&quot;</span>.join(nltk.sent_tokenize(pred)) <span class="hljs-keyword">for</span> pred <span class="hljs-keyword">in</span> preds]
labels = [<span class="hljs-string">&quot;\n&quot;</span>.join(nltk.sent_tokenize(label)) <span class="hljs-keyword">for</span> label <span class="hljs-keyword">in</span> labels]
<span class="hljs-keyword">return</span> preds, labels<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1x69ayw">これは、 <code>Seq2SeqTrainer</code><code>compute_metrics()</code> 関数をどのように定義したかを思い出せば、見覚えがあるはずです。</p> <p data-svelte-h="svelte-io43kt">最後に、ハギングフェイス ハブにモデルリポジトリを作成する必要があります。これには、適切なタイトルの🤗 ハブ ライブラリを使用します。
私たちは、リポジトリの名前を定義する必要があるだけです。このライブラリには、リポジトリ ID とユーザプロファイルを組み合わせるユーティリティ関数があります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> get_full_repo_name
model_name = <span class="hljs-string">&quot;test-bert-finetuned-squad-accelerate&quot;</span>
repo_name = get_full_repo_name(model_name)
repo_name<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;lewtun/mt5-finetuned-amazon-en-es-accelerate&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-7pdgi">このリポジトリ名を使って、resultsディレクトリにローカルバージョンをクローンし、学習用成果物を格納します。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> Repository
output_dir = <span class="hljs-string">&quot;results-mt5-finetuned-squad-accelerate&quot;</span>
repo = Repository(output_dir, clone_from=repo_name)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-103hp75">これにより、トレーニング中に <code>repo.push_to_hub()</code> メソッドを呼び出すことで、成果物をハブにプッシュバックすることができます!
それでは、トレーニングループを書き出し、分析を終えましょう。</p> <h3 class="relative group"><a id="学習ループ" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#学習ループ"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>学習ループ</span></h3> <p data-svelte-h="svelte-mp3dud">要約のためのトレーニングループは、私たちが遭遇した他の🤗 Accelerateの例と非常によく似ており、大きく4つの主要なステップに分かれています。</p> <ol data-svelte-h="svelte-ski3me"><li>各エポックごとに <code>train_dataloader</code> にあるすべての例に対して繰り返し処理を行い、モデルを学習させる。</li> <li>各エポック終了時に、まずトークンを生成し、それをデコードしてテキストにすることでモデルの要約を生成する。(参考要約も)。</li> <li>先に見たのと同じ手法でROUGEスコアを計算する。</li> <li>チェックポイントを保存し、すべてをハブにプッシュする。ここでは、エポック毎にチェックポイントを <em>非同期</em> にプッシュできるように、<code>Repository</code> オブジェクトの <code>blocking=False</code> という便利な引数に頼っています。これにより、GBサイズのモデルで発生する遅いアップロードを待つことなく、学習を継続することができるようになりました。</li></ol> <p data-svelte-h="svelte-whdqdc">これらの手順は、以下のコードブロックのようになります。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> tqdm.auto <span class="hljs-keyword">import</span> tqdm
<span class="hljs-keyword">import</span> torch
<span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np
progress_bar = tqdm(<span class="hljs-built_in">range</span>(num_training_steps))
<span class="hljs-keyword">for</span> epoch <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(num_train_epochs):
<span class="hljs-comment"># Training</span>
model.train()
<span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(train_dataloader):
outputs = model(**batch)
loss = outputs.loss
accelerator.backward(loss)
optimizer.step()
lr_scheduler.step()
optimizer.zero_grad()
progress_bar.update(<span class="hljs-number">1</span>)
<span class="hljs-comment"># Evaluation</span>
model.<span class="hljs-built_in">eval</span>()
<span class="hljs-keyword">for</span> step, batch <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(eval_dataloader):
<span class="hljs-keyword">with</span> torch.no_grad():
generated_tokens = accelerator.unwrap_model(model).generate(
batch[<span class="hljs-string">&quot;input_ids&quot;</span>],
attention_mask=batch[<span class="hljs-string">&quot;attention_mask&quot;</span>],
)
generated_tokens = accelerator.pad_across_processes(
generated_tokens, dim=<span class="hljs-number">1</span>, pad_index=tokenizer.pad_token_id
)
labels = batch[<span class="hljs-string">&quot;labels&quot;</span>]
<span class="hljs-comment"># If we did not pad to max length, we need to pad the labels too</span>
labels = accelerator.pad_across_processes(
batch[<span class="hljs-string">&quot;labels&quot;</span>], dim=<span class="hljs-number">1</span>, pad_index=tokenizer.pad_token_id
)
generated_tokens = accelerator.gather(generated_tokens).cpu().numpy()
labels = accelerator.gather(labels).cpu().numpy()
<span class="hljs-comment"># Replace -100 in the labels as we can&#x27;t decode them</span>
labels = np.where(labels != -<span class="hljs-number">100</span>, labels, tokenizer.pad_token_id)
<span class="hljs-keyword">if</span> <span class="hljs-built_in">isinstance</span>(generated_tokens, <span class="hljs-built_in">tuple</span>):
generated_tokens = generated_tokens[<span class="hljs-number">0</span>]
decoded_preds = tokenizer.batch_decode(
generated_tokens, skip_special_tokens=<span class="hljs-literal">True</span>
)
decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=<span class="hljs-literal">True</span>)
decoded_preds, decoded_labels = postprocess_text(
decoded_preds, decoded_labels
)
rouge_score.add_batch(predictions=decoded_preds, references=decoded_labels)
<span class="hljs-comment"># Compute metrics</span>
result = rouge_score.compute()
<span class="hljs-comment"># Extract the median ROUGE scores</span>
result = {key: value.mid.fmeasure * <span class="hljs-number">100</span> <span class="hljs-keyword">for</span> key, value <span class="hljs-keyword">in</span> result.items()}
result = {k: <span class="hljs-built_in">round</span>(v, <span class="hljs-number">4</span>) <span class="hljs-keyword">for</span> k, v <span class="hljs-keyword">in</span> result.items()}
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;Epoch <span class="hljs-subst">{epoch}</span>:&quot;</span>, result)
<span class="hljs-comment"># Save and upload</span>
accelerator.wait_for_everyone()
unwrapped_model = accelerator.unwrap_model(model)
unwrapped_model.save_pretrained(output_dir, save_function=accelerator.save)
<span class="hljs-keyword">if</span> accelerator.is_main_process:
tokenizer.save_pretrained(output_dir)
repo.push_to_hub(
commit_message=<span class="hljs-string">f&quot;Training in progress epoch <span class="hljs-subst">{epoch}</span>&quot;</span>, blocking=<span class="hljs-literal">False</span>
)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->Epoch <span class="hljs-number">0</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">5.6351</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">1.1625</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">5.4866</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">5.5005</span>}
Epoch <span class="hljs-number">1</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">9.8646</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">3.4106</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">9.9439</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">9.9306</span>}
Epoch <span class="hljs-number">2</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">11.0872</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">3.3273</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">11.0508</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">10.9468</span>}
Epoch <span class="hljs-number">3</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">11.8587</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">4.8167</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">11.7986</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">11.7518</span>}
Epoch <span class="hljs-number">4</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">12.9842</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">5.5887</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">12.7546</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">12.7029</span>}
Epoch <span class="hljs-number">5</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.4628</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.4598</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.312</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.2913</span>}
Epoch <span class="hljs-number">6</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">12.9131</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">5.8914</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">12.6896</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">12.5701</span>}
Epoch <span class="hljs-number">7</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.3079</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.2994</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.1536</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.1194</span>}
Epoch <span class="hljs-number">8</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">13.96</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">6.5998</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">13.9123</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.7744</span>}
Epoch <span class="hljs-number">9</span>: {<span class="hljs-string">&#x27;rouge1&#x27;</span>: <span class="hljs-number">14.1192</span>, <span class="hljs-string">&#x27;rouge2&#x27;</span>: <span class="hljs-number">7.0059</span>, <span class="hljs-string">&#x27;rougeL&#x27;</span>: <span class="hljs-number">14.1172</span>, <span class="hljs-string">&#x27;rougeLsum&#x27;</span>: <span class="hljs-number">13.9509</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1iodlq">それで終わりです。これを実行すると、<code>Trainer</code>で得たものとよく似たモデルと結果が得られます。</p> <h2 class="relative group"><a id="あなたの微調整したモデルを使用する" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#あなたの微調整したモデルを使用する"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>あなたの微調整したモデルを使用する</span></h2> <p data-svelte-h="svelte-11fwu8z">モデルをハブにプッシュしたら、推論ウィジェットか <code>pipeline</code> オブジェクトを使って、次のように操作することができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
hub_model_id = <span class="hljs-string">&quot;huggingface-course/mt5-small-finetuned-amazon-en-es&quot;</span>
summarizer = pipeline(<span class="hljs-string">&quot;summarization&quot;</span>, model=hub_model_id)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12p8wm6">要約の品質について感触を得るために、テストセット(モデルは見た事がない)からいくつかの例をパイプラインに送り込むことができます。最初に、レビュー、タイトル、生成された要約を一緒に表示する簡単な関数を実装してみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-keyword">def</span> <span class="hljs-title function_">print_summary</span>(<span class="hljs-params">idx</span>):
review = books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_body&quot;</span>]
title = books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_title&quot;</span>]
summary = summarizer(books_dataset[<span class="hljs-string">&quot;test&quot;</span>][idx][<span class="hljs-string">&quot;review_body&quot;</span>])[<span class="hljs-number">0</span>][<span class="hljs-string">&quot;summary_text&quot;</span>]
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;&#x27;&gt;&gt;&gt; Review: <span class="hljs-subst">{review}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt;&gt; Title: <span class="hljs-subst">{title}</span>&#x27;&quot;</span>)
<span class="hljs-built_in">print</span>(<span class="hljs-string">f&quot;\n&#x27;&gt;&gt;&gt; Summary: <span class="hljs-subst">{summary}</span>&#x27;&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1vzhzt4">英語の例を一つ見てみましょう。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->print_summary(<span class="hljs-number">100</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt;&gt; Review: Nothing special at all about this product... the book is too small and stiff and hard to write in. The huge sticker on the back doesn’t come off and looks super tacky. I would not purchase this again. I could have just bought a journal from the dollar store and it would be basically the same thing. It’s also really expensive for what it is.&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Title: Not impressed at all... buy something else&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Summary: Nothing special at all about this product&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-17yppx6">これは悪くありません! 私たちのモデルは実際に新しい単語でレビューの一部を補強することによって、抽象的な要約を行うことができたことがわかります。また、私たちのモデルの最もクールな点は、バイリンガルであることです。したがって、スペイン語のレビューの要約も生成することができます。</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->print_summary(<span class="hljs-number">0</span>)<!-- HTML_TAG_END --></pre></div> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-string">&#x27;&gt;&gt;&gt; Review: Es una trilogia que se hace muy facil de leer. Me ha gustado, no me esperaba el final para nada&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Title: Buena literatura para adolescentes&#x27;</span>
<span class="hljs-string">&#x27;&gt;&gt;&gt; Summary: Muy facil de leer&#x27;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-2tmqnj">要約は英語で「Very easy to read」と訳され、この要約の場合はレビューの文中から直接抽出されたことが分かります。しかし、これはmT5モデルの多用途性を示しており、多言語コーパスを扱うことがどのようなものかを体験していただけたと思います。</p> <p data-svelte-h="svelte-gxwd20">次に、もう少し複雑なタスクである、ゼロから言語モデルを学習させる事に目を向けます。</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/course/blob/main/chapters/ja/chapter7/5.mdx" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1tghmjc = {
assets: "/docs/course/pr_1069/ja",
base: "/docs/course/pr_1069/ja",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/course/pr_1069/ja/_app/immutable/entry/start.e837503d.js"),
import("/docs/course/pr_1069/ja/_app/immutable/entry/app.2ea0d654.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 28],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
210 kB
·
Xet hash:
5b3e372f5d4bd40f787cec00a6347e15e13cc43e6f3cd6409389f539f041cd13

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.