Buckets:

rtrm's picture
download
raw
41 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;النماذج متعددة اللغات للاستدلال&quot;,&quot;local&quot;:&quot;النماذج-متعددة-اللغات-للاستدلال&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;XLM&quot;,&quot;local&quot;:&quot;xlm&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;XLM مع تضمينات اللغة&quot;,&quot;local&quot;:&quot;xlm-مع-تضمينات-اللغة&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;XLM بدون تضمينات اللغة&quot;,&quot;local&quot;:&quot;xlm-بدون-تضمينات-اللغة&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;BERT&quot;,&quot;local&quot;:&quot;bert&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;XLM-RoBERTa&quot;,&quot;local&quot;:&quot;xlm-roberta&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;M2M100&quot;,&quot;local&quot;:&quot;m2m100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;MBart&quot;,&quot;local&quot;:&quot;mbart&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/pr_33913/ar/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/entry/start.a2b1508c.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/scheduler.5eb9d175.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/singletons.995fe7fe.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/index.4d790b85.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/paths.2c5f54bd.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/entry/app.72eb63b7.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/index.fcdcb606.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/nodes/0.6aa6e804.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/nodes/21.7f4b35df.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/CodeBlock.a7036e06.js">
<link rel="modulepreload" href="/docs/transformers/pr_33913/ar/_app/immutable/chunks/EditOnGithub.98bf070f.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;النماذج متعددة اللغات للاستدلال&quot;,&quot;local&quot;:&quot;النماذج-متعددة-اللغات-للاستدلال&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;XLM&quot;,&quot;local&quot;:&quot;xlm&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;XLM مع تضمينات اللغة&quot;,&quot;local&quot;:&quot;xlm-مع-تضمينات-اللغة&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;XLM بدون تضمينات اللغة&quot;,&quot;local&quot;:&quot;xlm-بدون-تضمينات-اللغة&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;BERT&quot;,&quot;local&quot;:&quot;bert&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;XLM-RoBERTa&quot;,&quot;local&quot;:&quot;xlm-roberta&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;M2M100&quot;,&quot;local&quot;:&quot;m2m100&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;MBart&quot;,&quot;local&quot;:&quot;mbart&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="النماذج-متعددة-اللغات-للاستدلال" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#النماذج-متعددة-اللغات-للاستدلال"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>النماذج متعددة اللغات للاستدلال</span></h1> <p data-svelte-h="svelte-1avx6c4">هناك العديد من النماذج متعددة اللغات في مكتبة 🤗 Transformers، وتختلف طريقة استخدامها للاستدلال عن النماذج أحادية اللغة. ولكن ليس كل استخدام النماذج متعددة اللغات مختلف. فبعض النماذج، مثل <a href="https://huggingface.co/google-bert/bert-base-multilingual-uncased" rel="nofollow">google-bert/bert-base-multilingual-uncased</a>، يمكن استخدامها تمامًا مثل النموذج أحادي اللغة. سيوضح لك هذا الدليل كيفية استخدام النماذج متعددة اللغات التي تختلف طريقة استخدامها للاستدلال.</p> <h2 class="relative group"><a id="xlm" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xlm"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>XLM</span></h2> <p data-svelte-h="svelte-16c5ea1">يحتوي XLM على عشر نسخ مختلفة، واحدة منها فقط أحادية اللغة. ويمكن تقسيم نسخ النماذج التسع المتبقية إلى فئتين: نسخ التي تستخدم تضمينات اللغة (language embeddings) وتلك التي لا تستخدمها.</p> <h3 class="relative group"><a id="xlm-مع-تضمينات-اللغة" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xlm-مع-تضمينات-اللغة"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>XLM مع تضمينات اللغة</span></h3> <p data-svelte-h="svelte-bncnyh">تستخدم النماذج التالية من XLM تضمينات اللغة لتحديد اللغة المستخدمة أثناء الاستدلال:</p> <ul data-svelte-h="svelte-qme59d"><li><code>FacebookAI/xlm-mlm-ende-1024</code> (نمذجة اللغة المقنعة، الإنجليزية-الألمانية)</li> <li><code>FacebookAI/xlm-mlm-enfr-1024</code> (نمذجة اللغة المقنعة، الإنجليزية-الفرنسية)</li> <li><code>FacebookAI/xlm-mlm-enro-1024</code> (نمذجة اللغة المقنعة، الإنجليزية-الرومانية)</li> <li><code>FacebookAI/xlm-mlm-xnli15-1024</code> (نمذجة اللغة المقنعة، لغات XNLI)</li> <li><code>FacebookAI/xlm-mlm-tlm-xnli15-1024</code> (نمذجة اللغة المقنعة + الترجمة، لغات XNLI)</li> <li><code>FacebookAI/xlm-clm-enfr-1024</code> (نمذجة اللغة السببية، الإنجليزية-الفرنسية)</li> <li><code>FacebookAI/xlm-clm-ende-1024</code> (نمذجة اللغة السببية، الإنجليزية-الألمانية)</li></ul> <p data-svelte-h="svelte-16zsdaz">تُمثل تضمينات اللغة على شكل مصفوفة بنفس شكل <code>input_ids</code> التي يتم تمريره إلى النموذج. وتعتمد القيم في هذه المصفوفات على اللغة المستخدمة ويتم تحديدها بواسطة معاملى المجزىء <code>lang2id</code> و <code>id2lang</code>.</p> <p data-svelte-h="svelte-zjzkob">في هذا المثال، قم بتحميل نسخة <code>FacebookAI/xlm-clm-enfr-1024</code> ( نمذجة اللغة السببية، الإنجليزية-الفرنسية):</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> XLMTokenizer, XLMWithLMHeadModel
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = XLMTokenizer.from_pretrained(<span class="hljs-string">&quot;FacebookAI/xlm-clm-enfr-1024&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = XLMWithLMHeadModel.from_pretrained(<span class="hljs-string">&quot;FacebookAI/xlm-clm-enfr-1024&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-12i2dut">تُظهر خاصية <code>lang2id</code> في المجزىء اللغات وأرقام تعريفها في هذا النموذج:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(tokenizer.lang2id)
{<span class="hljs-string">&#x27;en&#x27;</span>: <span class="hljs-number">0</span>, <span class="hljs-string">&#x27;fr&#x27;</span>: <span class="hljs-number">1</span>}<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gvwrte">بعد ذلك، قم بإنشاء مثال على المدخلات:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>input_ids = torch.tensor([tokenizer.encode(<span class="hljs-string">&quot;Wikipedia was used to&quot;</span>)]) <span class="hljs-comment"># batch size of 1</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-q8k5b2">قم بتعيين معرف اللغة إلى <code>&quot;en&quot;</code> واستخدمه لتحديد تضمين اللغة. وتضمين اللغة عبارة عن مصفوفة مملوءة بـ <code>0</code> لأن هذا هو معرف اللغة الإنجليزية. يجب أن تكون هذه المصفوفة بنفس حجم <code>input_ids</code>.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>language_id = tokenizer.lang2id[<span class="hljs-string">&quot;en&quot;</span>] <span class="hljs-comment"># 0</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>langs = torch.tensor([language_id] * input_ids.shape[<span class="hljs-number">1</span>]) <span class="hljs-comment"># torch.tensor([0, 0, 0, ..., 0])</span>
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># نقوم بإعادة تشكيلها لتكون بالحجم (batch_size، sequence_length)</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>langs = langs.view(<span class="hljs-number">1</span>, -<span class="hljs-number">1</span>) <span class="hljs-comment"># الآن بالحجم [1، sequence_length] (لدينا batch size تساوي 1)</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1n3w08o">الآن يمكنك تمرير <code>input_ids</code> وتضمين اللغة إلى النموذج:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>outputs = model(input_ids, langs=langs)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-y0svvs">يمكن لنص البرنامج النصي <a href="https://github.com/huggingface/transformers/tree/main/examples/pytorch/text-generation/run_generation.py" rel="nofollow">run_generation.py</a> توليد النص باستخدام تضمينات اللغة مع نقاط تفتيش <code>xlm-clm</code>.</p> <h3 class="relative group"><a id="xlm-بدون-تضمينات-اللغة" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xlm-بدون-تضمينات-اللغة"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>XLM بدون تضمينات اللغة</span></h3> <p data-svelte-h="svelte-sqvj6c">النماذج التالية من XLM لا تتطلب تضمينات اللغة أثناء الاستنتاج:</p> <ul data-svelte-h="svelte-u984j0"><li><code>FacebookAI/xlm-mlm-17-1280</code> (نمذجة اللغة المقنعة، 17 لغة)</li> <li><code>FacebookAI/xlm-mlm-100-1280</code> (نمذجة اللغة المقنعة، 100 لغة)</li></ul> <p data-svelte-h="svelte-1v7fvi2">تُستخدم هذه النماذج لتمثيل الجمل العامة، على عكس نسح XLM السابقة.</p> <h2 class="relative group"><a id="bert" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#bert"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>BERT</span></h2> <p data-svelte-h="svelte-wuqpny">يمكن استخدام النماذج التالية من BERT للمهام متعددة اللغات:</p> <ul data-svelte-h="svelte-uyyck3"><li><code>google-bert/bert-base-multilingual-uncased</code> (نمذجة اللغة المقنعة + التنبؤ بالجملة التالية، 102 لغة)</li> <li><code>google-bert/bert-base-multilingual-cased</code> (نمذجة اللغة المقنعة + التنبؤ بالجملة التالية، 104 لغات)</li></ul> <p data-svelte-h="svelte-h1bpfj">لا تتطلب هذه النماذج تضمينات اللغة أثناء الاستدلال. يجب أن تُحدّد اللغة من السياق وتستنتج وفقاً لذلك.</p> <h2 class="relative group"><a id="xlm-roberta" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#xlm-roberta"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>XLM-RoBERTa</span></h2> <p data-svelte-h="svelte-1frphf0">يمكن استخدام النماذج التالية من XLM-RoBERTa للمهام متعددة اللغات:</p> <ul data-svelte-h="svelte-1l2pe6a"><li><code>FacebookAI/xlm-roberta-base</code> (نمذجة اللغة المقنعة، 100 لغة)</li> <li><code>FacebookAI/xlm-roberta-large</code> (نمذجة اللغة المقنعة، 100 لغة)</li></ul> <p data-svelte-h="svelte-1h73pd7">تم تدريب XLM-RoBERTa على 2.5 تيرابايت من بيانات CommonCrawl الجديدة والمحسنة في 100 لغة. ويوفر مكاسب قوية على النماذج متعددة اللغات التي تم إصدارها سابقاً مثل mBERT أو XLM في مهام المصب مثل التصنيف، ووضع العلامات التسلسلية، والأسئلة والأجوبة.</p> <h2 class="relative group"><a id="m2m100" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#m2m100"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>M2M100</span></h2> <p data-svelte-h="svelte-1w8ml31">يمكن استخدام النماذج التالية من M2M100 للترجمة متعددة اللغات:</p> <ul data-svelte-h="svelte-1jzqxs7"><li><code>facebook/m2m100_418M</code> (الترجمة)</li> <li><code>facebook/m2m100_1.2B</code> (الترجمة)</li></ul> <p data-svelte-h="svelte-e8wtbp">في هذا المثال، قم بتحميل نسحة <code>facebook/m2m100_418M</code> لترجمة النص من الصينية إلى الإنجليزية. يمكنك تعيين اللغة المصدر في المجزىء اللغوى:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> M2M100ForConditionalGeneration, M2M100Tokenizer
<span class="hljs-meta">&gt;&gt;&gt; </span>en_text = <span class="hljs-string">&quot;Do not meddle in the affairs of wizards, for they are subtle and quick to anger.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>chinese_text = <span class="hljs-string">&quot;不要插手巫師的事務, 因為他們是微妙的, 很快就會發怒.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = M2M100Tokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/m2m100_418M&quot;</span>, src_lang=<span class="hljs-string">&quot;zh&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = M2M100ForConditionalGeneration.from_pretrained(<span class="hljs-string">&quot;facebook/m2m100_418M&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-w1kdq3">تقسيم النّص إلى رموز:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoded_zh = tokenizer(chinese_text, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1pzhn4e">يجبر M2M100 معرف اللغة الهدف كأول رمز مولد للترجمة إلى اللغة الهدف. قم بتعيين <code>forced_bos_token_id</code> إلى <code>en</code> في طريقة <code>generate</code> للترجمة إلى الإنجليزية:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>generated_tokens = model.generate(**encoded_zh, forced_bos_token_id=tokenizer.get_lang_id(<span class="hljs-string">&quot;en&quot;</span>))
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.batch_decode(generated_tokens, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-string">&#x27;Do not interfere with the matters of the witches, because they are delicate and will soon be angry.&#x27;</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="mbart" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#mbart"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>MBart</span></h2> <p data-svelte-h="svelte-yo3thy">يمكن استخدام النماذج التالية من MBart للترجمة متعددة اللغات:</p> <ul data-svelte-h="svelte-pmfr0h"><li><code>facebook/mbart-large-50-one-to-many-mmt</code> (الترجمة الآلية متعددة اللغات من واحد إلى كثير، 50 لغة)</li> <li><code>facebook/mbart-large-50-many-to-many-mmt</code> (الترجمة الآلية متعددة اللغات من كثير إلى كثير، 50 لغة)</li> <li><code>facebook/mbart-large-50-many-to-one-mmt</code> (الترجمة الآلية متعددة اللغات من كثير إلى واحد، 50 لغة)</li> <li><code>facebook/mbart-large-50</code> (الترجمة متعددة اللغات، 50 لغة)</li> <li><code>facebook/mbart-large-cc25</code></li></ul> <p data-svelte-h="svelte-120qox6">في هذا المثال، قم بتحميل نسخة <code>facebook/mbart-large-50-many-to-many-mmt</code> لترجمة النص من الفنلندية إلى الإنجليزية. يمكنك تعيين اللغة المصدر في المجزىء:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, AutoModelForSeq2SeqLM
<span class="hljs-meta">&gt;&gt;&gt; </span>en_text = <span class="hljs-string">&quot;Do not meddle in the affairs of wizards, for they are subtle and quick to anger.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>fi_text = <span class="hljs-string">&quot;Älä sekaannu velhojen asioihin, sillä ne ovat hienovaraisia ja nopeasti vihaisia.&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">&quot;facebook/mbart-large-50-many-to-many-mmt&quot;</span>, src_lang=<span class="hljs-string">&quot;fi_FI&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForSeq2SeqLM.from_pretrained(<span class="hljs-string">&quot;facebook/mbart-large-50-many-to-many-mmt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-w1kdq3">تقسيم النّص إلى رموز:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoded_en = tokenizer(en_text, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gqhmz6">يجبر MBart معرف لغة الهدف كأول رمز مولد للترجمة إلى اللغة الهدف. قم بتعيين <code>forced_bos_token_id</code> إلى <code>en</code> في طريقة <code>generate</code> للترجمة إلى الإنجليزية:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>generated_tokens = model.generate(**encoded_en, forced_bos_token_id=tokenizer.lang_code_to_id[<span class="hljs-string">&quot;en_XX&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.batch_decode(generated_tokens, skip_special_tokens=<span class="hljs-literal">True</span>)
<span class="hljs-string">&quot;Don&#x27;t interfere with the wizard&#x27;s affairs, because they are subtle, will soon get angry.&quot;</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1jqybb2">إذا كنت تستخدم نسخة <code>facebook/mbart-large-50-many-to-one-mmt</code>، فلا تحتاج إلى إجبار معرف لغة الهدف كأول رمز مولد، وإلا فإن الاستخدام هو نفسه.</p> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ar/multilingual.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1x8g0d4 = {
assets: "/docs/transformers/pr_33913/ar",
base: "/docs/transformers/pr_33913/ar",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/pr_33913/ar/_app/immutable/entry/start.a2b1508c.js"),
import("/docs/transformers/pr_33913/ar/_app/immutable/entry/app.72eb63b7.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 21],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
41 kB
·
Xet hash:
91f30ec5d106eb2ac3081feae824bdaee765d3d63641273c5d7b0f2c3364c5d2

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.