Buckets:
| <meta charset="utf-8" /><meta name="hf:doc:metadata" content="{"title":"تصنيف الرموز(Token classification)","local":"تصنيف-الرموزtoken-classification","sections":[{"title":"تحميل مجموعة بيانات WNUT 17","local":"تحميل-مجموعة-بيانات-wnut-17","sections":[],"depth":2},{"title":"المعالجة المسبقة(Preprocess)","local":"المعالجة-المسبقةpreprocess","sections":[],"depth":2},{"title":"التقييم(Evaluate)","local":"التقييمevaluate","sections":[],"depth":2},{"title":"التدريب(Train)","local":"التدريبtrain","sections":[],"depth":2},{"title":"الاستدلال(Inference)","local":"الاستدلالinference","sections":[],"depth":2}],"depth":1}"> | |
| <link href="/docs/transformers/main/ar/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/entry/start.01f9ab35.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/scheduler.eaf6c8c4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/singletons.f7e976e0.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/index.5e3cad04.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/paths.160a0ac7.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/entry/app.dc433586.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/preload-helper.d648f8ef.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/index.e25dcc83.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/nodes/0.0571600e.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/each.e59479a4.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/nodes/43.2403bb67.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/Tip.32737511.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/MermaidChart.svelte_svelte_type_style_lang.c5abd470.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/Youtube.7df060fd.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/CodeBlock.d3edfc80.js"> | |
| <link rel="modulepreload" href="/docs/transformers/main/ar/_app/immutable/chunks/DocNotebookDropdown.cd339010.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{"title":"تصنيف الرموز(Token classification)","local":"تصنيف-الرموزtoken-classification","sections":[{"title":"تحميل مجموعة بيانات WNUT 17","local":"تحميل-مجموعة-بيانات-wnut-17","sections":[],"depth":2},{"title":"المعالجة المسبقة(Preprocess)","local":"المعالجة-المسبقةpreprocess","sections":[],"depth":2},{"title":"التقييم(Evaluate)","local":"التقييمevaluate","sections":[],"depth":2},{"title":"التدريب(Train)","local":"التدريبtrain","sections":[],"depth":2},{"title":"الاستدلال(Inference)","local":"الاستدلالinference","sections":[],"depth":2}],"depth":1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <div class="items-center shrink-0 min-w-[100px] max-sm:min-w-[50px] justify-end ml-auto flex" style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"><div class="inline-flex rounded-md max-sm:rounded-sm"><button class="inline-flex items-center gap-1 h-7 max-sm:h-7 px-2 max-sm:px-1.5 text-sm font-medium text-gray-800 border border-r-0 rounded-l-md max-sm:rounded-l-sm border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-live="polite"><span class="inline-flex items-center justify-center rounded-md p-0.5 max-sm:p-0 hover:text-gray-800 dark:hover:text-gray-200"><svg class="sm:size-3.5 size-3" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg></span> <span>Copy page</span></button> <button class="inline-flex items-center justify-center w-6 max-sm:w-5 h-7 max-sm:h-7 disabled:pointer-events-none text-sm text-gray-500 hover:text-gray-700 dark:hover:text-white rounded-r-md max-sm:rounded-r-sm border border-l transition border-gray-200 bg-white hover:shadow-inner dark:border-gray-850 dark:bg-gray-950 dark:text-gray-200 dark:hover:bg-gray-800" aria-haspopup="menu" aria-expanded="false" aria-label="Open copy menu"><svg class="transition-transform text-gray-400 overflow-visible sm:size-3.5 size-3 rotate-0" width="1em" height="1em" viewBox="0 0 12 7" fill="none" xmlns="http://www.w3.org/2000/svg"><path d="M1 1L6 6L11 1" stroke="currentColor"></path></svg></button></div> </div> <div class="flex space-x-1 " style="float: right; margin-left: 10px; display: inline-flex; position: relative; z-index: 10;"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <h1 class="relative group"><a id="تصنيف-الرموزtoken-classification" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#تصنيف-الرموزtoken-classification"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>تصنيف الرموز(Token classification)</span></h1> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/wVHdVlPScxA" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-mlex5h">يهدف تصنيف الرموز إلى إعطاء تسمية لكل رمز على حدة في الجملة. من أكثر مهام تصنيف الرموز شيوعًا هو التعرف على الكيانات المسماة (NER). يحاول NER تحديد تسمية لكل كيان في الجملة، مثل شخص، أو مكان، أو منظمة.</p> <p data-svelte-h="svelte-lp8700">سيوضح لك هذا الدليل كيفية:</p> <ol data-svelte-h="svelte-f6bdfy"><li>ضبط <a href="https://huggingface.co/distilbert/distilbert-base-uncased" rel="nofollow">DistilBERT</a> على مجموعة بيانات <a href="https://huggingface.co/datasets/wnut_17" rel="nofollow">WNUT 17</a> للكشف عن كيانات جديدة.</li> <li>استخدام نموذجك المضبوط بدقة للاستدلال.</li></ol> <blockquote class="tip"><p data-svelte-h="svelte-1pa6gj1">للاطلاع جميع البنى والنقاط المتوافقة مع هذه المهمة، نوصي بالرجوع من <a href="https://huggingface.co/tasks/token-classification" rel="nofollow">صفحة المهمة</a>.</p></blockquote> <p data-svelte-h="svelte-93upt2">قبل أن تبدأ، تأكد من تثبيت جميع المكتبات الضرورية:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-bash "><!-- HTML_TAG_START -->pip install transformers datasets evaluate seqeval<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1akjhwh">نحن نشجعك على تسجيل الدخول إلى حساب HuggingFace الخاص بك حتى تتمكن من تحميل ومشاركة نموذجك مع المجتمع. عندما يُطلب منك، أدخل رمزك لتسجيل الدخول:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login | |
| <span class="hljs-meta">>>> </span>notebook_login()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="تحميل-مجموعة-بيانات-wnut-17" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#تحميل-مجموعة-بيانات-wnut-17"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>تحميل مجموعة بيانات WNUT 17</span></h2> <p data-svelte-h="svelte-1tafdxt">ابدأ بتحميل مجموعة بيانات WNUT 17 من مكتبة 🤗 Datasets:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset | |
| <span class="hljs-meta">>>> </span>wnut = load_dataset(<span class="hljs-string">"wnut_17"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1e1soci">ثم ألق نظرة على مثال:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>wnut[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>] | |
| {<span class="hljs-string">'id'</span>: <span class="hljs-string">'0'</span>, | |
| <span class="hljs-string">'ner_tags'</span>: [<span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">8</span>, <span class="hljs-number">8</span>, <span class="hljs-number">0</span>, <span class="hljs-number">7</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>], | |
| <span class="hljs-string">'tokens'</span>: [<span class="hljs-string">'@paulwalk'</span>, <span class="hljs-string">'It'</span>, <span class="hljs-string">"'s"</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'view'</span>, <span class="hljs-string">'from'</span>, <span class="hljs-string">'where'</span>, <span class="hljs-string">'I'</span>, <span class="hljs-string">"'m"</span>, <span class="hljs-string">'living'</span>, <span class="hljs-string">'for'</span>, <span class="hljs-string">'two'</span>, <span class="hljs-string">'weeks'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'Empire'</span>, <span class="hljs-string">'State'</span>, <span class="hljs-string">'Building'</span>, <span class="hljs-string">'='</span>, <span class="hljs-string">'ESB'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'Pretty'</span>, <span class="hljs-string">'bad'</span>, <span class="hljs-string">'storm'</span>, <span class="hljs-string">'here'</span>, <span class="hljs-string">'last'</span>, <span class="hljs-string">'evening'</span>, <span class="hljs-string">'.'</span>] | |
| }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-krnhtx">يمثل كل رقم في <code>ner_tags</code> كياناً. حوّل الأرقام إلى أسماء التصنيفات لمعرفة ماهية الكيانات:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>label_list = wnut[<span class="hljs-string">"train"</span>].features[<span class="hljs-string">f"ner_tags"</span>].feature.names | |
| <span class="hljs-meta">>>> </span>label_list | |
| [ | |
| <span class="hljs-string">"O"</span>, | |
| <span class="hljs-string">"B-corporation"</span>, | |
| <span class="hljs-string">"I-corporation"</span>, | |
| <span class="hljs-string">"B-creative-work"</span>, | |
| <span class="hljs-string">"I-creative-work"</span>, | |
| <span class="hljs-string">"B-group"</span>, | |
| <span class="hljs-string">"I-group"</span>, | |
| <span class="hljs-string">"B-location"</span>, | |
| <span class="hljs-string">"I-location"</span>, | |
| <span class="hljs-string">"B-person"</span>, | |
| <span class="hljs-string">"I-person"</span>, | |
| <span class="hljs-string">"B-product"</span>, | |
| <span class="hljs-string">"I-product"</span>, | |
| ]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-adxshk">يشير الحرف الذي يسبق كل <code>ner_tag</code> إلى موضع الرمز للكيان:</p> <ul data-svelte-h="svelte-himjeb"><li><code>B-</code> يشير إلى بداية الكيان.</li> <li><code>I-</code> يشير إلى أن الرمز يقع ضمن نفس الكيان (على سبيل المثال، الرمز <code>State</code> هو جزء من كيان مثل <code>Empire State Building</code>).</li> <li><code>0</code> يشير إلى أن الرمز لا يمثل أي كيان.</li></ul> <h2 class="relative group"><a id="المعالجة-المسبقةpreprocess" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#المعالجة-المسبقةpreprocess"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>المعالجة المسبقة(Preprocess)</span></h2> <iframe class="w-full xl:w-4/6 h-80" src="https://www.youtube-nocookie.com/embed/iY2AZYdZAr0" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe> <p data-svelte-h="svelte-24obir">الخطوة التالية هي تحميل مُجزِّئ النصوص DistilBERT للمعالجة المسبقة لحقل <code>tokens</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"distilbert/distilbert-base-uncased"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1ice9v2">كما رأيت في حقل <code>tokens</code> المثال أعلاه، يبدو أن المدخل قد تم تحليله بالفعل. لكن المدخل لم يُجزأ بعد ويتعيّن عليك ضبط <code>is_split_into_words=True</code> لتقسيم الكلمات إلى كلمات فرعية. على سبيل المثال:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>example = wnut[<span class="hljs-string">"train"</span>][<span class="hljs-number">0</span>] | |
| <span class="hljs-meta">>>> </span>tokenized_input = tokenizer(example[<span class="hljs-string">"tokens"</span>], is_split_into_words=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">>>> </span>tokens = tokenizer.convert_ids_to_tokens(tokenized_input[<span class="hljs-string">"input_ids"</span>]) | |
| <span class="hljs-meta">>>> </span>tokens | |
| [<span class="hljs-string">'[CLS]'</span>, <span class="hljs-string">'@'</span>, <span class="hljs-string">'paul'</span>, <span class="hljs-string">'##walk'</span>, <span class="hljs-string">'it'</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">'s'</span>, <span class="hljs-string">'the'</span>, <span class="hljs-string">'view'</span>, <span class="hljs-string">'from'</span>, <span class="hljs-string">'where'</span>, <span class="hljs-string">'i'</span>, <span class="hljs-string">"'"</span>, <span class="hljs-string">'m'</span>, <span class="hljs-string">'living'</span>, <span class="hljs-string">'for'</span>, <span class="hljs-string">'two'</span>, <span class="hljs-string">'weeks'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'empire'</span>, <span class="hljs-string">'state'</span>, <span class="hljs-string">'building'</span>, <span class="hljs-string">'='</span>, <span class="hljs-string">'es'</span>, <span class="hljs-string">'##b'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'pretty'</span>, <span class="hljs-string">'bad'</span>, <span class="hljs-string">'storm'</span>, <span class="hljs-string">'here'</span>, <span class="hljs-string">'last'</span>, <span class="hljs-string">'evening'</span>, <span class="hljs-string">'.'</span>, <span class="hljs-string">'[SEP]'</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-n6w5d2">ومع ذلك، يضيف هذا بعض الرموز الخاصة <code>[CLS]</code> و<code>[SEP]</code> وتقسيم الكلمات إلى أجزاء يُنشئ عدم تطابق بين المُدخلات والتسميات. قد يتم تقسيم كلمة واحدة تقابل تسمية واحدة الآن إلى كلمتين فرعيتين. ستحتاج إلى إعادة محاذاة الرموز والتسميات عن طريق:</p> <ol data-svelte-h="svelte-e69f3s"><li>ربط كل رمز بالكلمة الأصلية باستخدام الخاصية <a href="https://huggingface.co/docs/transformers/main_classes/tokenizer#transformers.BatchEncoding.word_ids" rel="nofollow"><code>word_ids</code></a>.</li> <li>تعيين التسمية <code>-100</code> للرموز الخاصة <code>[CLS]</code> و<code>[SEP]</code> بحيث يتم تجاهلها بواسطة دالة الخسارة PyTorch (انظر <a href="https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html" rel="nofollow">CrossEntropyLoss</a>).</li> <li>تسمية الرمز الأول فقط لكلمة معينة. قم بتعيين <code>-100</code> لأجزاء الكلمة الأخرى.</li></ol> <p data-svelte-h="svelte-58h08z">هنا كيف يمكنك إنشاء وظيفة لإعادة محاذاة الرموز والتسميات، وقص الجمل لتتجاوز الحد الأقصى لطول مُدخلات DistilBERT:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">tokenize_and_align_labels</span>(<span class="hljs-params">examples</span>): | |
| <span class="hljs-meta">... </span> tokenized_inputs = tokenizer(examples[<span class="hljs-string">"tokens"</span>], truncation=<span class="hljs-literal">True</span>, is_split_into_words=<span class="hljs-literal">True</span>) | |
| <span class="hljs-meta">... </span> labels = [] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(examples[<span class="hljs-string">f"ner_tags"</span>]): | |
| <span class="hljs-meta">... </span> word_ids = tokenized_inputs.word_ids(batch_index=i) <span class="hljs-comment"># تعيين الرموز إلى كلماتهم المقابلة.</span> | |
| <span class="hljs-meta">... </span> previous_word_idx = <span class="hljs-literal">None</span> | |
| <span class="hljs-meta">... </span> label_ids = [] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> word_idx <span class="hljs-keyword">in</span> word_ids: <span class="hljs-comment"># تعيين الرموز الخاصة إلى -100.</span> | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> word_idx <span class="hljs-keyword">is</span> <span class="hljs-literal">None</span>: | |
| <span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">elif</span> word_idx != previous_word_idx: <span class="hljs-comment"># تسمية الرمز الأول فقط لكلمة معينة.</span> | |
| <span class="hljs-meta">... </span> label_ids.append(label[word_idx]) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>: | |
| <span class="hljs-meta">... </span> label_ids.append(-<span class="hljs-number">100</span>) | |
| <span class="hljs-meta">... </span> previous_word_idx = word_idx | |
| <span class="hljs-meta">... </span> labels.append(label_ids) | |
| <span class="hljs-meta">... </span> tokenized_inputs[<span class="hljs-string">"labels"</span>] = labels | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> tokenized_inputs<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-k7o1mk">لتطبيق هذه العملية على كامل مجموعة البيانات، استخدم الدالة <code>map</code> لمجموعة بيانات 🤗. يمكنك تسريع الدالة <code>map</code> عن طريق تعيين <code>batched=True</code> لمعالجة عناصر متعددة من مجموعة البيانات في وقت واحد:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>tokenized_wnut = wnut.<span class="hljs-built_in">map</span>(tokenize_and_align_labels, batched=<span class="hljs-literal">True</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1l905x3">الآن قم بإنشاء دفعة من الأمثلة باستخدام <code>DataCollatorWithPadding</code>.من الأفضل استخدام <em>الحشو الديناميكي</em> للجمل إلى أطول طول في دفعة أثناء التجميع، بدلاً من حشو مجموعة البيانات بالكامل إلى الطول الأقصى.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DataCollatorForTokenClassification | |
| <span class="hljs-meta">>>> </span>data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="التقييمevaluate" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#التقييمevaluate"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>التقييم(Evaluate)</span></h2> <p data-svelte-h="svelte-12xv8go">يُعدّ تضمين مقياس أثناء التدريب مفيدًا في تقييم أداء نموذجك. يمكنك تحميل طريقة تقييم بسرعة مع مكتبة 🤗 <a href="https://huggingface.co/docs/evaluate/index" rel="nofollow">Evaluate</a>. لهذه المهمة، قم بتحميل إطار <a href="https://huggingface.co/spaces/evaluate-metric/seqeval" rel="nofollow">seqeval</a> (انظر جولة 🤗 Evaluate <a href="https://huggingface.co/docs/evaluate/a_quick_tour" rel="nofollow">quick tour</a> لمعرفة المزيد حول كيفية تحميل وحساب مقياس). يُخرج seqeval عدة نتائج: الدقة، والاستذكار، ومقياس F1، والدقة.</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> evaluate | |
| <span class="hljs-meta">>>> </span>seqeval = evaluate.load(<span class="hljs-string">"seqeval"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10jvy45">احصل على تسميات الكيانات المسماة (NER) أولاً،ثم أنشئ دالة تُمرر تنبؤاتك وتسمياتك الصحيحة إلى <code>compute</code> لحساب النتائج:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">import</span> numpy <span class="hljs-keyword">as</span> np | |
| <span class="hljs-meta">>>> </span>labels = [label_list[i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> example[<span class="hljs-string">f"ner_tags"</span>]] | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">compute_metrics</span>(<span class="hljs-params">p</span>): | |
| <span class="hljs-meta">... </span> predictions, labels = p | |
| <span class="hljs-meta">... </span> predictions = np.argmax(predictions, axis=<span class="hljs-number">2</span>) | |
| <span class="hljs-meta">... </span> true_predictions = [ | |
| <span class="hljs-meta">... </span> [label_list[p] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels) | |
| <span class="hljs-meta">... </span> ] | |
| <span class="hljs-meta">... </span> true_labels = [ | |
| <span class="hljs-meta">... </span> [label_list[l] <span class="hljs-keyword">for</span> (p, l) <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(prediction, label) <span class="hljs-keyword">if</span> l != -<span class="hljs-number">100</span>] | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> prediction, label <span class="hljs-keyword">in</span> <span class="hljs-built_in">zip</span>(predictions, labels) | |
| <span class="hljs-meta">... </span> ] | |
| <span class="hljs-meta">... </span> results = seqeval.compute(predictions=true_predictions, references=true_labels) | |
| <span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"precision"</span>: results[<span class="hljs-string">"overall_precision"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"recall"</span>: results[<span class="hljs-string">"overall_recall"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"f1"</span>: results[<span class="hljs-string">"overall_f1"</span>], | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"accuracy"</span>: results[<span class="hljs-string">"overall_accuracy"</span>], | |
| <span class="hljs-meta">... </span> }<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-t0jeyd">دالة <code>compute_metrics</code> جاهزة للاستخدام، وستحتاج إليها عند إعداد التدريب.</p> <h2 class="relative group"><a id="التدريبtrain" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#التدريبtrain"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>التدريب(Train)</span></h2> <p data-svelte-h="svelte-1fc001q">قبل تدريب النموذج، جهّز خريطة تربط بين المعرّفات المتوقعة وتسمياتها باستخدام <code>id2label</code> و <code>label2id</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>id2label = { | |
| <span class="hljs-meta">... </span> <span class="hljs-number">0</span>: <span class="hljs-string">"O"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">1</span>: <span class="hljs-string">"B-corporation"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">2</span>: <span class="hljs-string">"I-corporation"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">3</span>: <span class="hljs-string">"B-creative-work"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">4</span>: <span class="hljs-string">"I-creative-work"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">5</span>: <span class="hljs-string">"B-group"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">6</span>: <span class="hljs-string">"I-group"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">7</span>: <span class="hljs-string">"B-location"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">8</span>: <span class="hljs-string">"I-location"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">9</span>: <span class="hljs-string">"B-person"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">10</span>: <span class="hljs-string">"I-person"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">11</span>: <span class="hljs-string">"B-product"</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-number">12</span>: <span class="hljs-string">"I-product"</span>, | |
| <span class="hljs-meta">... </span>} | |
| <span class="hljs-meta">>>> </span>label2id = { | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"O"</span>: <span class="hljs-number">0</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-corporation"</span>: <span class="hljs-number">1</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-corporation"</span>: <span class="hljs-number">2</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-creative-work"</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-creative-work"</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-group"</span>: <span class="hljs-number">5</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-group"</span>: <span class="hljs-number">6</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-location"</span>: <span class="hljs-number">7</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-location"</span>: <span class="hljs-number">8</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-person"</span>: <span class="hljs-number">9</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-person"</span>: <span class="hljs-number">10</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"B-product"</span>: <span class="hljs-number">11</span>, | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"I-product"</span>: <span class="hljs-number">12</span>, | |
| <span class="hljs-meta">... </span>}<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-182x1vq">إذا لم تكن على دراية بتعديل نموذج باستخدام <code>Trainer</code>, ألق نظرة على الدليل التعليمي الأساسي <a href="../training#train-with-pytorch-trainer">هنا</a>!</p></blockquote> <p data-svelte-h="svelte-1q5em0k">أنت مستعد الآن لبدء تدريب نموذجك! قم بتحميل DistilBERT مع <code>AutoModelForTokenClassification</code> إلى جانب عدد التصنيفات المتوقعة، وخريطة التسميات:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification, TrainingArguments, Trainer | |
| <span class="hljs-meta">>>> </span>model = AutoModelForTokenClassification.from_pretrained( | |
| <span class="hljs-meta">... </span> <span class="hljs-string">"distilbert/distilbert-base-uncased"</span>, num_labels=<span class="hljs-number">13</span>, id2label=id2label, label2id=label2id | |
| <span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-js7en6">في هذه المرحلة، هناك ثلاث خطوات فقط متبقية:</p> <ol data-svelte-h="svelte-xrljiu"><li>حدد معلمات التدريب الخاصة بك في <code>TrainingArguments</code>. المعامل الوحيد المطلوب هو <code>output_dir</code> الذي يحدد مكان حفظ نموذجك. ستقوم بدفع هذا النموذج إلى Hub عن طريق تعيين <code>push_to_hub=True</code> (يجب أن تكون مسجلاً الدخول إلى Hugging Face لتحميل نموذجك). في نهاية كل حقبة، سيقوم <code>Trainer</code> بتقييم درجات seqeval وحفظ تسخة التدريب.</li> <li>قم بتمرير معاملات التدريب إلى <code>Trainer</code> إلى جانب النموذج، ومجموعة البيانات، والمُجزِّئ اللغوي، و<code>data collator</code>، ودالة <code>compute_metrics</code>. | |
| 3.استدعِ <code>train()</code> لتدريب نموذجك.</li></ol> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>training_args = TrainingArguments( | |
| <span class="hljs-meta">... </span> output_dir=<span class="hljs-string">"my_awesome_wnut_model"</span>, | |
| <span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">2e-5</span>, | |
| <span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> per_device_eval_batch_size=<span class="hljs-number">16</span>, | |
| <span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">2</span>, | |
| <span class="hljs-meta">... </span> weight_decay=<span class="hljs-number">0.01</span>, | |
| <span class="hljs-meta">... </span> eval_strategy=<span class="hljs-string">"epoch"</span>, | |
| <span class="hljs-meta">... </span> save_strategy=<span class="hljs-string">"epoch"</span>, | |
| <span class="hljs-meta">... </span> load_best_model_at_end=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer = Trainer( | |
| <span class="hljs-meta">... </span> model=model, | |
| <span class="hljs-meta">... </span> args=training_args, | |
| <span class="hljs-meta">... </span> train_dataset=tokenized_wnut[<span class="hljs-string">"train"</span>], | |
| <span class="hljs-meta">... </span> eval_dataset=tokenized_wnut[<span class="hljs-string">"test"</span>], | |
| <span class="hljs-meta">... </span> processing_class=tokenizer, | |
| <span class="hljs-meta">... </span> data_collator=data_collator, | |
| <span class="hljs-meta">... </span> compute_metrics=compute_metrics, | |
| <span class="hljs-meta">... </span>) | |
| <span class="hljs-meta">>>> </span>trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-ff6r7l">بمجرد اكتمال التدريب، شارك نموذجك على Hub باستخدام طريقة <code>push_to_hub()</code> حتى يتمكن الجميع من استخدام نموذجك:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <blockquote class="tip"><p data-svelte-h="svelte-1xy2vh1">للحصول على مثال أكثر تفصيلاً حول كيفية تعديل نموذج لتصنيف الرموز، ألق نظرة على الدفتر المقابل | |
| <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification.ipynb" rel="nofollow">دفتر PyTorch</a> | |
| أو <a href="https://colab.research.google.com/github/huggingface/notebooks/blob/main/examples/token_classification-tf.ipynb" rel="nofollow">دفتر TensorFlow</a>.</p></blockquote> <h2 class="relative group"><a id="الاستدلالinference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#الاستدلالinference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>الاستدلال(Inference)</span></h2> <p data-svelte-h="svelte-ltiwph">رائع، الآن بعد أن قمت بتعديل نموذج، يمكنك استخدامه للاستدلال!</p> <p data-svelte-h="svelte-1gaxkwa">احصل على بعض النصوص التي تريد تشغيل الاستدلال عليها:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>text = <span class="hljs-string">"The Golden State Warriors are an American professional basketball team based in San Francisco."</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-6lvpup">أبسط طريقة لتجربة نموذجك المُدرب مسبقًا للاستدلال هي استخدامه في <code>pipeline()</code>. قم بتنفيذ <code>pipeline</code> لتصنيف الكيانات المسماة مع نموذجك، ومرر نصك إليه:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline | |
| <span class="hljs-meta">>>> </span>classifier = pipeline(<span class="hljs-string">"ner"</span>, model=<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span>classifier(text) | |
| [{<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.42658573</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">2</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'golden'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">10</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'I-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.35856336</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">3</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'state'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">11</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">16</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-group'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.3064001</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">4</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'warriors'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">17</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">25</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.65523505</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">13</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'san'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">80</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">83</span>}, | |
| {<span class="hljs-string">'entity'</span>: <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'score'</span>: <span class="hljs-number">0.4668663</span>, | |
| <span class="hljs-string">'index'</span>: <span class="hljs-number">14</span>, | |
| <span class="hljs-string">'word'</span>: <span class="hljs-string">'francisco'</span>, | |
| <span class="hljs-string">'start'</span>: <span class="hljs-number">84</span>, | |
| <span class="hljs-string">'end'</span>: <span class="hljs-number">93</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-asb9jo">يمكنك أيضًا تكرار نتائج <code>pipeline</code> يدويًا إذا أردت:</p> <p data-svelte-h="svelte-9bzhm2">قسّم النص إلى رموز وأرجع المُوتّرات بلغة PyTorch:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer | |
| <span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span>inputs = tokenizer(text, return_tensors=<span class="hljs-string">"pt"</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-bshp75">مرر مدخلاتك إلى النموذج واحصل على <code>logits</code>:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForTokenClassification | |
| <span class="hljs-meta">>>> </span>model = AutoModelForTokenClassification.from_pretrained(<span class="hljs-string">"stevhliu/my_awesome_wnut_model"</span>) | |
| <span class="hljs-meta">>>> </span><span class="hljs-keyword">with</span> torch.no_grad(): | |
| <span class="hljs-meta">... </span> logits = model(**inputs).logits<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-q2tg1i">استخرج الفئة ذات الاحتمالية الأعلى، واستخدم جدول <code>id2label</code> الخاصة بالنموذج لتحويلها إلى تسمية نصية:</p> <div class="code-block relative "><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class="language-py "><!-- HTML_TAG_START --><span class="hljs-meta">>>> </span>predictions = torch.argmax(logits, dim=<span class="hljs-number">2</span>) | |
| <span class="hljs-meta">>>> </span>predicted_token_class = [model.config.id2label[t.item()] <span class="hljs-keyword">for</span> t <span class="hljs-keyword">in</span> predictions[<span class="hljs-number">0</span>]] | |
| <span class="hljs-meta">>>> </span>predicted_token_class | |
| [<span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'I-location'</span>, | |
| <span class="hljs-string">'B-group'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'B-location'</span>, | |
| <span class="hljs-string">'O'</span>, | |
| <span class="hljs-string">'O'</span>]<!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ar/tasks/token_classification.md" target="_blank"><svg class="mr-1" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M31,16l-7,7l-1.41-1.41L28.17,16l-5.58-5.59L24,9l7,7z"></path><path d="M1,16l7-7l1.41,1.41L3.83,16l5.58,5.59L8,23l-7-7z"></path><path d="M12.419,25.484L17.639,6.552l1.932,0.518L14.351,26.002z"></path></svg> <span data-svelte-h="svelte-zjs2n5"><span class="underline">Update</span> on GitHub</span></a> <p></p> | |
| <script> | |
| { | |
| __sveltekit_1266ta = { | |
| assets: "/docs/transformers/main/ar", | |
| base: "/docs/transformers/main/ar", | |
| env: {} | |
| }; | |
| const element = document.currentScript.parentElement; | |
| const data = [null,null]; | |
| Promise.all([ | |
| import("/docs/transformers/main/ar/_app/immutable/entry/start.01f9ab35.js"), | |
| import("/docs/transformers/main/ar/_app/immutable/entry/app.dc433586.js") | |
| ]).then(([kit, app]) => { | |
| kit.start(app, element, { | |
| node_ids: [0, 43], | |
| data, | |
| form: null, | |
| error: null | |
| }); | |
| }); | |
| } | |
| </script> | |
Xet Storage Details
- Size:
- 76.3 kB
- Xet hash:
- 7a7892de736d2c31d000461969691beed6b8dd49debc181fdff43bfcb3216cdf
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.