Buckets:

hf-doc-build
/

doc-dev

hf-doc-build/doc-dev / transformers /pr_33174 /ko /_app /immutable /nodes /28.acdbfbdb.js

rtrm's picture

about 2 months ago

97.6 kB

	import{s as pa,o as ua,n as re}from"../chunks/scheduler.56730f09.js";import{S as ha,i as fa,g as i,s as n,r as p,A as ga,h as l,f as o,c as a,j as M,u,x as m,k as x,y as s,a as r,v as h,d as f,t as g,w as _}from"../chunks/index.1f144517.js";import{T as xo}from"../chunks/Tip.41e845e5.js";import{D as q,E as zo}from"../chunks/ExampleCodeBlock.e82611a0.js";import{C as Wt}from"../chunks/CodeBlock.738eeccb.js";import{P as _a}from"../chunks/PipelineTag.82d6c31e.js";import{H as D,E as ba}from"../chunks/EditOnGithub.854793f1.js";function ka(C){let d,v="<code>Llama2</code> 모델은 <code>bfloat16</code>을 사용하여 훈련되었지만, 원래 추론은 <code>float16</code>을 사용합니다. 허브에 업로드된 체크포인트는 <code>torch_dtype = 'float16'</code>을 사용하며, 이는 <code>AutoModel</code> API에 의해 체크포인트를 <code>torch.float32</code>에서 <code>torch.float16</code>으로 캐스팅하는 데 사용됩니다.",c,k,w="온라인 가중치의 <code>dtype</code>은 <code>model = AutoModelForCausalLM.from_pretrained("path", torch_dtype = "auto")</code>를 사용하여 모델을 초기화할 때 <code>torch_dtype="auto"</code>를 사용하지 않는 한 대부분 관련이 없습니다. 그 이유는 모델이 먼저 다운로드될 것이고 (온라인 체크포인트의 <code>dtype</code>을 사용하여) 그다음에 기본 <code>dtype</code>인 <code>torch</code>로 캐스팅하고(<code>torch.float32</code>가 됨), 마지막으로 구성(configuration)에서 제공된 <code>torch_dtype</code>이 있는 경우 이를 사용하기 때문입니다.",b,T,ie="모델을 <code>float16</code>에서 훈련하는 것은 권장되지 않으며 <code>nan</code>을 생성하는 것으로 알려져 있습니다. 따라서 모델은 <code>bfloat16</code>에서 훈련되어야 합니다.";return{c(){d=i("p"),d.innerHTML=v,c=n(),k=i("p"),k.innerHTML=w,b=n(),T=i("p"),T.innerHTML=ie},l($){d=l($,"P",{"data-svelte-h":!0}),m(d)!=="svelte-17o3s0r"&&(d.innerHTML=v),c=a($),k=l($,"P",{"data-svelte-h":!0}),m(k)!=="svelte-twggkk"&&(k.innerHTML=w),b=a($),T=l($,"P",{"data-svelte-h":!0}),m(T)!=="svelte-ulakwv"&&(T.innerHTML=ie)},m($,Z){r($,d,Z),r($,c,Z),r($,k,Z),r($,b,Z),r($,T,Z)},p:re,d($){$&&(o(d),o(c),o(k),o(b),o(T))}}}function va(C){let d,v;return d=new Wt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMExsYW1hTW9kZWwlMkMlMjBMbGFtYUNvbmZpZyUwQSUwQSUyMyUyMEluaXRpYWxpemluZyUyMGElMjBMTGFNQSUyMGxsYW1hLTdiJTIwc3R5bGUlMjBjb25maWd1cmF0aW9uJTBBY29uZmlndXJhdGlvbiUyMCUzRCUyMExsYW1hQ29uZmlnKCklMEElMEElMjMlMjBJbml0aWFsaXppbmclMjBhJTIwbW9kZWwlMjBmcm9tJTIwdGhlJTIwbGxhbWEtN2IlMjBzdHlsZSUyMGNvbmZpZ3VyYXRpb24lMEFtb2RlbCUyMCUzRCUyMExsYW1hTW9kZWwoY29uZmlndXJhdGlvbiklMEElMEElMjMlMjBBY2Nlc3NpbmclMjB0aGUlMjBtb2RlbCUyMGNvbmZpZ3VyYXRpb24lMEFjb25maWd1cmF0aW9uJTIwJTNEJTIwbW9kZWwuY29uZmln",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> LlamaModel, LlamaConfig

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a LLaMA llama-7b style configuration</span>
	<span class="hljs-meta">>>> </span>configuration = LlamaConfig()

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Initializing a model from the llama-7b style configuration</span>
	<span class="hljs-meta">>>> </span>model = LlamaModel(configuration)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Accessing the model configuration</span>
	<span class="hljs-meta">>>> </span>configuration = model.config`,wrap:!1}}),{c(){p(d.$$.fragment)},l(c){u(d.$$.fragment,c)},m(c,k){h(d,c,k),v=!0},p:re,i(c){v\|\|(f(d.$$.fragment,c),v=!0)},o(c){g(d.$$.fragment,c),v=!1},d(c){_(d,c)}}}function ya(C){let d,v="sequence pair mask has the following format:",c,k,w;return k=new Wt({props:{code:"MCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAwJTIwMCUyMDAlMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMjAxJTIwMSUyMDElMEElN0MlMjBmaXJzdCUyMHNlcXVlbmNlJTIwJTIwJTIwJTIwJTdDJTIwc2Vjb25kJTIwc2VxdWVuY2UlMjAlN0M=",highlighted:`0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 0 </span>0<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1<span class="hljs-number"> 1 </span>1 1
	\| first sequence \| second sequence \|`,wrap:!1}}),{c(){d=i("p"),d.textContent=v,c=n(),p(k.$$.fragment)},l(b){d=l(b,"P",{"data-svelte-h":!0}),m(d)!=="svelte-16klr56"&&(d.textContent=v),c=a(b),u(k.$$.fragment,b)},m(b,T){r(b,d,T),r(b,c,T),h(k,b,T),w=!0},p:re,i(b){w\|\|(f(k.$$.fragment,b),w=!0)},o(b){g(k.$$.fragment,b),w=!1},d(b){b&&(o(d),o(c)),_(k,b)}}}function La(C){let d,v;return d=new Wt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMExsYW1hVG9rZW5pemVyRmFzdCUwQSUwQXRva2VuaXplciUyMCUzRCUyMExsYW1hVG9rZW5pemVyRmFzdC5mcm9tX3ByZXRyYWluZWQoJTIyaGYtaW50ZXJuYWwtdGVzdGluZyUyRmxsYW1hLXRva2VuaXplciUyMiklMEF0b2tlbml6ZXIuZW5jb2RlKCUyMkhlbGxvJTIwdGhpcyUyMGlzJTIwYSUyMHRlc3QlMjIp",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> LlamaTokenizerFast

	<span class="hljs-meta">>>> </span>tokenizer = LlamaTokenizerFast.from_pretrained(<span class="hljs-string">"hf-internal-testing/llama-tokenizer"</span>)
	<span class="hljs-meta">>>> </span>tokenizer.encode(<span class="hljs-string">"Hello this is a test"</span>)
	[<span class="hljs-number">1</span>, <span class="hljs-number">15043</span>, <span class="hljs-number">445</span>, <span class="hljs-number">338</span>, <span class="hljs-number">263</span>, <span class="hljs-number">1243</span>]`,wrap:!1}}),{c(){p(d.$$.fragment)},l(c){u(d.$$.fragment,c)},m(c,k){h(d,c,k),v=!0},p:re,i(c){v\|\|(f(d.$$.fragment,c),v=!0)},o(c){g(d.$$.fragment,c),v=!1},d(c){_(d,c)}}}function Ta(C){let d,v=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){d=i("p"),d.innerHTML=v},l(c){d=l(c,"P",{"data-svelte-h":!0}),m(d)!=="svelte-fincs2"&&(d.innerHTML=v)},m(c,k){r(c,d,k)},p:re,d(c){c&&o(d)}}}function $a(C){let d,v=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){d=i("p"),d.innerHTML=v},l(c){d=l(c,"P",{"data-svelte-h":!0}),m(d)!=="svelte-fincs2"&&(d.innerHTML=v)},m(c,k){r(c,d,k)},p:re,d(c){c&&o(d)}}}function wa(C){let d,v="Example:",c,k,w;return k=new Wt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Ub2tlbml6ZXIlMkMlMjBMbGFtYUZvckNhdXNhbExNJTBBJTBBbW9kZWwlMjAlM0QlMjBMbGFtYUZvckNhdXNhbExNLmZyb21fcHJldHJhaW5lZCglMjJtZXRhLWxsYW1hJTJGTGxhbWEtMi03Yi1oZiUyMiklMEF0b2tlbml6ZXIlMjAlM0QlMjBBdXRvVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjJtZXRhLWxsYW1hJTJGTGxhbWEtMi03Yi1oZiUyMiklMEElMEFwcm9tcHQlMjAlM0QlMjAlMjJIZXklMkMlMjBhcmUlMjB5b3UlMjBjb25zY2lvdXMlM0YlMjBDYW4lMjB5b3UlMjB0YWxrJTIwdG8lMjBtZSUzRiUyMiUwQWlucHV0cyUyMCUzRCUyMHRva2VuaXplcihwcm9tcHQlMkMlMjByZXR1cm5fdGVuc29ycyUzRCUyMnB0JTIyKSUwQSUwQSUyMyUyMEdlbmVyYXRlJTBBZ2VuZXJhdGVfaWRzJTIwJTNEJTIwbW9kZWwuZ2VuZXJhdGUoaW5wdXRzLmlucHV0X2lkcyUyQyUyMG1heF9sZW5ndGglM0QzMCklMEF0b2tlbml6ZXIuYmF0Y2hfZGVjb2RlKGdlbmVyYXRlX2lkcyUyQyUyMHNraXBfc3BlY2lhbF90b2tlbnMlM0RUcnVlJTJDJTIwY2xlYW5fdXBfdG9rZW5pemF0aW9uX3NwYWNlcyUzREZhbHNlKSU1QjAlNUQ=",highlighted:`<span class="hljs-meta">>>> </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoTokenizer, LlamaForCausalLM

	<span class="hljs-meta">>>> </span>model = LlamaForCausalLM.from_pretrained(<span class="hljs-string">"meta-llama/Llama-2-7b-hf"</span>)
	<span class="hljs-meta">>>> </span>tokenizer = AutoTokenizer.from_pretrained(<span class="hljs-string">"meta-llama/Llama-2-7b-hf"</span>)

	<span class="hljs-meta">>>> </span>prompt = <span class="hljs-string">"Hey, are you conscious? Can you talk to me?"</span>
	<span class="hljs-meta">>>> </span>inputs = tokenizer(prompt, return_tensors=<span class="hljs-string">"pt"</span>)

	<span class="hljs-meta">>>> </span><span class="hljs-comment"># Generate</span>
	<span class="hljs-meta">>>> </span>generate_ids = model.generate(inputs.input_ids, max_length=<span class="hljs-number">30</span>)
	<span class="hljs-meta">>>> </span>tokenizer.batch_decode(generate_ids, skip_special_tokens=<span class="hljs-literal">True</span>, clean_up_tokenization_spaces=<span class="hljs-literal">False</span>)[<span class="hljs-number">0</span>]
	<span class="hljs-string">"Hey, are you conscious? Can you talk to me?\\nI'm not conscious, but I can talk to you."</span>`,wrap:!1}}),{c(){d=i("p"),d.textContent=v,c=n(),p(k.$$.fragment)},l(b){d=l(b,"P",{"data-svelte-h":!0}),m(d)!=="svelte-11lpom8"&&(d.textContent=v),c=a(b),u(k.$$.fragment,b)},m(b,T){r(b,d,T),r(b,c,T),h(k,b,T),w=!0},p:re,i(b){w\|\|(f(k.$$.fragment,b),w=!0)},o(b){g(k.$$.fragment,b),w=!1},d(b){b&&(o(d),o(c)),_(k,b)}}}function Ma(C){let d,v=`Although the recipe for forward pass needs to be defined within this function, one should call the <code>Module</code>
	instance afterwards instead of this since the former takes care of running the pre and post processing steps while
	the latter silently ignores them.`;return{c(){d=i("p"),d.innerHTML=v},l(c){d=l(c,"P",{"data-svelte-h":!0}),m(d)!=="svelte-fincs2"&&(d.innerHTML=v)},m(c,k){r(c,d,k)},p:re,d(c){c&&o(d)}}}function xa(C){let d,v,c,k,w,b,T,ie,$,Z='Llama2 모델은 Hugo Touvron, Louis Martin, Kevin Stone, Peter Albert, Amjad Almahairi, Ya1smine Babaei, Nikolay Bashlykov, Soumya Batra, Prajjwal Bhargava, Shruti Bhosale, Dan Bikel, Lukas Blecher, Cristian Canton Ferrer, Moya Chen, Guillem Cucurull, David Esiobu, Jude Fernandes, Jeremy Fu, Wenyin Fu, Brian Fuller, Cynthia Gao, Vedanuj Goswami, Naman Goyal, Anthony Hartshorn, Saghar Hosseini, Rui Hou, Hakan Inan, Marcin Kardas, Viktor Kerkez Madian Khabsa, Isabel Kloumann, Artem Korenev, Punit Singh Koura, Marie-Anne Lachaux, Thibaut Lavril, Jenya Lee, Diana Liskovich, Yinghai Lu, Yuning Mao, Xavier Martinet, Todor Mihaylov, Pushkar Mishra, Igor Molybog, Yixin Nie, Andrew Poulton, Jeremy Reizenstein, Rashi Rungta, Kalyan Saladi, Alan Schelten, Ruan Silva, Eric Michael Smith, Ranjan Subramanian, Xiaoqing EllenTan, Binh Tang, Ross Taylor, Adina Williams, Jian Xiang Kuan, Puxin Xu, Zheng Yan, Iliyan Zarov, Yuchen Zhang, Angela Fan, Melanie Kambadur, Sharan Narang, Aurelien Rodriguez, Robert Stojnic, Sergey Edunov, Thomas Scialom의 논문 <a href="https://ai.meta.com/research/publications/llama-2-open-foundation-and-fine-tuned-chat-models/" rel="nofollow">LLaMA: Open Foundation and Fine-Tuned Chat Models</a>에서 제안되었습니다. 채팅 어플리케이션에 맞게 미세 조정된 체크포인트를 포함된 7B에서 70B 범위의 매개변수를 가진 기초 언어 모델 모음입니다!',Et,le,yn="논문의 초록은 다음과 같습니다:",St,de,Ln="<em>이 연구에서 우리는 70억에서 700억 파라미터의 범위에서 사전 훈련 및 미세 조정된 대규모 언어 모델(LLMs)의 모음인 Llama 2를 개발 및 공개합니다. Llama 2-Chat라고 불리는 미세 조정된 LLMs은 대화 사용 사례에 최적화되었습니다. 우리의 모델은 테스트한 대부분의 벤치마크에서 오픈 소스 채팅 모델보다 성능이 뛰어나며, 유용성과 안전성에 대한 인적 평가를 바탕으로 비공개 소스 모델을 대체할 수 있는 적절한 대안이 될 수 있습니다. 우리는 Llama 2-Chat의 미세 조정 및 안전성 향상의 접근 방식에 대한 자세한 설명을 제공하여 커뮤니티가 우리의 작업을 기반으로 LLMs의 책임있는 개발에 기여할 수 있도록 합니다.</em>",Nt,ce,Tn='<a href="https://huggingface.co/models?search=llama2" rel="nofollow">여기</a>에서 모든 Llama2 모델을 확인할 수 있습니다.',Jt,B,Rt,me,$n="🍯 팁:",Gt,pe,wn='<li>Llama2 모델의 가중치는 <a href="https://ai.meta.com/resources/models-and-libraries/llama-downloads/" rel="nofollow">이 양식</a>을 작성하여 얻을 수 있습니다.</li> <li>아키텍처는 처음 버전의 Llama와 매우 유사하며, <a href="https://arxiv.org/pdf/2305.13245.pdf" rel="nofollow">이 논문</a>의 내용에 따라 Grouped Query Attention (GQA)이 추가되었습니다.</li> <li><code>config.pretraining_tp</code>를 1과 다른 값으로 설정하면 더 정확하지만 느린 선형 레이어 계산이 활성화되어 원본 로짓과 더 잘 일치하게 됩니다.</li> <li>원래 모델은 <code>pad_id = -1</code>을 사용하는데, 이는 패딩 토큰이 없음을 의미합니다. 동일한 로직을 사용할 수 없으므로 <code>tokenizer.add_special_tokens({"pad_token":"<pad>"})</code>를 사용하여 패딩 토큰을 추가하고 이에 따라 토큰 임베딩 크기를 조정해야 합니다. 또한 <code>model.config.pad_token_id</code>를 설정해야 합니다. 모델의 <code>embed_tokens</code> 레이어는 <code>self.embed_tokens = nn.Embedding(config.vocab_size, config.hidden_size, self.config.padding_idx)</code>로 초기화되어, 패딩 토큰 인코딩이 0을 출력하도록 합니다. 따라서 초기화 시에 전달하는 것을 권장합니다.</li> <li>양식을 작성하고 모델 체크포인트 접근 권한을 얻은 후에는 이미 변환된 체크포인트를 사용할 수 있습니다. 그렇지 않고 자신의 모델을 직접 변환하려는 경우, <a href="https://github.com/huggingface/transformers/blob/main/src/transformers/models/llama/convert_llama_weights_to_hf.py" rel="nofollow">변환 스크립트</a>를 자유롭게 사용하세요. 스크립트는 다음과 같은 예시의 명령어로 호출할 수 있습니다:</li>',Dt,ue,Zt,he,Mn="<li>변환 후 모델과 토크나이저는 다음과 같이 로드할 수 있습니다:</li>",Bt,fe,Vt,ge,xn="스크립트를 실행하려면 모델을 float16 정밀도로 전부 호스트할 수 있을 만큼 충분한 CPU RAM이 필요합니다 (가장 큰 버전이 여러 체크포인트로 제공되더라도 각 체크포인트는 모델 가중치의 일부만을 포함하므로 모두 RAM에 로드해야 합니다). 75B 모델의 경우, 총 145GB의 RAM이 필요합니다.",Xt,_e,zn='<li>LLaMA 토크나이저는 <a href="https://github.com/google/sentencepiece" rel="nofollow">sentencepiece</a>를 기반으로 한 BPE 모델입니다. sentencepiece의 특징 중 하나는 시퀀스를 디코딩할 때 첫 번째 토큰이 단어의 시작이면 (예: “Banana”) 토크나이저는 문자열 앞에 접두사 공간을 추가하지 않는 것입니다.</li>',Yt,be,Cn='이 모델은 <a href="https://huggingface.co/ArthurZ" rel="nofollow">Arthur Zucker</a>가 <a href="https://huggingface.co/lysandre" rel="nofollow">Lysandre Debut</a>의 도움을 받아 제공하였습니다. Hugging Face에서의 구현 코드는 <a href="https://github.com/EleutherAI/gpt-neox" rel="nofollow">여기</a>의 GPT-NeoX 를 기반으로 합니다. 저자의 원래 코드는 <a href="https://github.com/facebookresearch/llama" rel="nofollow">여기</a>에서 찾을 수 있습니다.',Ot,ke,Qt,ve,qn="LLaMA2를 시작하는 데 도움이 될 Hugging Face의 공식 및 커뮤니티(🌎로 표시) 리소스 목록입니다. 여기에 새로운 리소스를 추가하기 위해서 Pull Request를 열어 주시면 검토하겠습니다! 리소스는 기존 리소스와 중복되지 않는 새로운 것을 보여주는 것이 이상적입니다.",Kt,ye,Fn='<li><a href="https://huggingface.co/blog/llama2" rel="nofollow">Llama 2 is here - get it on Hugging Face</a>, Llama 2에 관한 블로그 포스트와 🤗 Transformers 및 🤗 PEFT와 함께 사용하는 방법에 대한 내용입니다.</li> <li><a href="https://www.philschmid.de/llama-2" rel="nofollow">LLaMA 2 - Every Resource you need</a>, LLaMA 2에 대해 알아보고 빠르게 시작하는 데 필요한 관련 리소스의 모음입니다.</li>',eo,Le,to,Te,jn='<li>Google Colab에서 QLoRA와 4-bit 정밀도를 사용하여 Llama 2를 미세 조정하는 방법에 대한 <a href="https://colab.research.google.com/drive/1PEQyJO1-f6j0S_XJ8DV50NkpzasXkrzd?usp=sharing" rel="nofollow">노트북</a>입니다. 🌎</li> <li>“Llama-v2-7b-guanaco” 모델을 4-bit QLoRA로 미세 조정하고 PDF에서 Q&A 데이터셋을 생성하는 방법에 대한 <a href="https://colab.research.google.com/drive/134o_cXcMe_lsvl15ZE_4Y75Kstepsntu?usp=sharing" rel="nofollow">노트북</a>입니다. 🌎</li>',oo,$e,In="⚗️ 최적화",no,we,Pn='<li><a href="https://huggingface.co/blog/dpo-trl" rel="nofollow">Llama 2를 DPO로 미세 조정하기</a>, TRL 라이브러리의 DPO 방법을 사용하여 특정 데이터셋에서 Llama 2를 미세 조정하는 방법을 안내하는 가이드입니다.</li> <li><a href="https://www.philschmid.de/instruction-tune-llama-2" rel="nofollow">확장 가이드: Llama 2 명령어 조정</a>, 입력에서 명령어를 생성하도록 Llama 2를 훈련시키는 방법을 안내하는 가이드로, 명령어를 따르는 모델에서 명령어를 주는 모델로 변환합니다.</li> <li>개인 컴퓨터에서 QLoRA와 TRL을 사용하여 Llama 2 모델을 미세 조정하는 방법에 대한 <a href="https://colab.research.google.com/drive/1SYpgFpcmtIUzdE7pxqknrM4ArCASfkFQ?usp=sharing" rel="nofollow">노트북</a>입니다. 🌎</li>',ao,Me,Wn="⚡️ 추론",so,xe,Hn='<li>AutoGPTQ 라이브러리의 GPTQ를 사용하여 Llama 2 모델을 양자화하는 방법에 대한 <a href="https://colab.research.google.com/drive/1TC56ArKerXUpbgRy5vM3woRsbTEVNq7h?usp=sharing" rel="nofollow">노트북</a>입니다. 🌎</li> <li>로컬 컴퓨터나 Google Colab에서 4-bit 양자화로 Llama 2 채팅 모델을 실행하는 방법에 대한 <a href="https://colab.research.google.com/drive/1X1z9Q6domMKl2CnEM0QGHNwidLfR4dW2?usp=sharing" rel="nofollow">노트북</a>입니다. 🌎</li>',ro,ze,Un="🚀 배포",io,Ce,An='<li><a href="https://www.philschmid.de/sagemaker-llama2-qlora" rel="nofollow">Amazon SageMaker에서 LLaMA 2 (7-70B) 미세 조정하기</a>, Amazon SageMaker에서 QLoRA 미세 조정 및 배포에 이르기까지의 완전한 가이드입니다.</li> <li><a href="https://www.philschmid.de/sagemaker-llama-llm" rel="nofollow">Amazon SageMaker에서 Llama 2 7B/13B/70B 배포하기</a>, 안전하고 확장 가능한 배포를 위해 Hugging Face의 LLM DLC 컨테이너를 사용하는 방법에 대한 가이드입니다.</li>',lo,qe,co,W,Fe,Co,nt,En=`This is the configuration class to store the configuration of a <a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaModel">LlamaModel</a>. It is used to instantiate an LLaMA
	model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
	defaults will yield a similar configuration to that of the LLaMA-7B.`,qo,at,Sn=`Configuration objects inherit from <code>PretrainedConfig</code> and can be used to control the model outputs. Read the
	documentation from <code>PretrainedConfig</code> for more information.`,Fo,V,mo,je,po,F,Ie,jo,st,Nn=`Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding. The default padding token is unset as there is
	no padding token in the original model.`,Io,rt,Pe,Po,X,We,Wo,it,Jn=`Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
	special tokens using the tokenizer <code>prepare_for_model</code> method.`,Ho,H,He,Uo,lt,Rn="Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT",Ao,Y,Eo,dt,Gn="if token_ids_1 is None, only returns the first portion of the mask (0s).",So,O,Ue,No,ct,Dn="Save the vocabulary and special tokens file to a directory.",uo,Ae,ho,y,Ee,Jo,mt,Zn="Construct a Llama tokenizer. Based on byte-level Byte-Pair-Encoding.",Ro,pt,Bn="This uses notably ByteFallback and no normalization.",Go,Q,Do,ut,Vn=`If you want to change the <code>bos_token</code> or the <code>eos_token</code>, make sure to specify them when initializing the model, or
	call <code>tokenizer.update_post_processor()</code> to make sure that the post-processing is correctly done (otherwise the
	values of the first token and final token of an encoded sequence will not be correct). For more details, checkout
	[post-processors] (<a href="https://huggingface.co/docs/tokenizers/api/post-processors" rel="nofollow">https://huggingface.co/docs/tokenizers/api/post-processors</a>) documentation.`,Zo,ht,Xn=`This tokenizer inherits from <code>PreTrainedTokenizerFast</code> which contains most of the main methods. Users should
	refer to this superclass for more information regarding those methods.`,Bo,ft,Se,Vo,K,Ne,Xo,gt,Yn=`Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
	special tokens using the tokenizer <code>prepare_for_model</code> or <code>encode_plus</code> methods.`,Yo,S,Je,Oo,_t,On=`Create the token type IDs corresponding to the sequences passed. <a href="../glossary#token-type-ids">What are token type
	IDs?</a>`,Qo,bt,Qn="Should be overridden in a subclass if the model has a special way of building those.",Ko,ee,Re,en,kt,Kn="Updates the underlying post processor with the current <code>bos_token</code> and <code>eos_token</code>.",tn,vt,Ge,fo,De,go,j,Ze,on,yt,ea=`The bare LLaMA Model outputting raw hidden-states without any specific head on top.
	This model inherits from <code>PreTrainedModel</code>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,nn,Lt,ta=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,an,Tt,oa="Transformer decoder consisting of <em>config.num_hidden_layers</em> layers. Each layer is a <code>LlamaDecoderLayer</code>",sn,N,Be,rn,$t,na='The <a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaModel">LlamaModel</a> forward method, overrides the <code>__call__</code> special method.',ln,te,_o,Ve,bo,G,Xe,dn,U,Ye,cn,wt,aa='The <a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaForCausalLM">LlamaForCausalLM</a> forward method, overrides the <code>__call__</code> special method.',mn,oe,pn,ne,ko,Oe,vo,z,Qe,un,Mt,sa="The LLaMa Model transformer with a sequence classification head on top (linear layer).",hn,xt,ra=`<a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaForSequenceClassification">LlamaForSequenceClassification</a> uses the last token in order to do the classification, as other causal models
	(e.g. GPT-2) do.`,fn,zt,ia=`Since it does classification on the last token, it requires to know the position of the last token. If a
	<code>pad_token_id</code> is defined in the configuration, it finds the last token that is not a padding token in each row. If
	no <code>pad_token_id</code> is defined, it simply takes the last value in each row of the batch. Since it cannot guess the
	padding tokens when <code>inputs_embeds</code> are passed instead of <code>input_ids</code>, it does the same (take the last value in
	each row of the batch).`,gn,Ct,la=`This model inherits from <code>PreTrainedModel</code>. Check the superclass documentation for the generic methods the
	library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
	etc.)`,_n,qt,da=`This model is also a PyTorch <a href="https://pytorch.org/docs/stable/nn.html#torch.nn.Module" rel="nofollow">torch.nn.Module</a> subclass.
	Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
	and behavior.`,bn,J,Ke,kn,Ft,ca='The <a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaForSequenceClassification">LlamaForSequenceClassification</a> forward method, overrides the <code>__call__</code> special method.',vn,ae,yo,et,Lo,Ht,To;return w=new D({props:{title:"Llama2",local:"llama2",headingTag:"h1"}}),T=new D({props:{title:"개요",local:"overview",headingTag:"h2"}}),B=new xo({props:{warning:!0,$$slots:{default:[ka]},$$scope:{ctx:C}}}),ue=new Wt({props:{code:"cHl0aG9uJTIwc3JjJTJGdHJhbnNmb3JtZXJzJTJGbW9kZWxzJTJGbGxhbWElMkZjb252ZXJ0X2xsYW1hX3dlaWdodHNfdG9faGYucHklMjAlNUMlMEElMjAlMjAlMjAlMjAtLWlucHV0X2RpciUyMCUyRnBhdGglMkZ0byUyRmRvd25sb2FkZWQlMkZsbGFtYSUyRndlaWdodHMlMjAtLW1vZGVsX3NpemUlMjA3QiUyMC0tb3V0cHV0X2RpciUyMCUyRm91dHB1dCUyRnBhdGg=",highlighted:`python src/transformers/models/llama/convert_llama_weights_to_hf.py \\
	--input_dir /path/to/downloaded/llama/weights --model_size 7B --output_dir /output/path`,wrap:!1}}),fe=new Wt({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMExsYW1hRm9yQ2F1c2FsTE0lMkMlMjBMbGFtYVRva2VuaXplciUwQSUwQXRva2VuaXplciUyMCUzRCUyMExsYW1hVG9rZW5pemVyLmZyb21fcHJldHJhaW5lZCglMjIlMkZvdXRwdXQlMkZwYXRoJTIyKSUwQW1vZGVsJTIwJTNEJTIwTGxhbWFGb3JDYXVzYWxMTS5mcm9tX3ByZXRyYWluZWQoJTIyJTJGb3V0cHV0JTJGcGF0aCUyMik=",highlighted:`<span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> LlamaForCausalLM, LlamaTokenizer

	tokenizer = LlamaTokenizer.from_pretrained(<span class="hljs-string">"/output/path"</span>)
	model = LlamaForCausalLM.from_pretrained(<span class="hljs-string">"/output/path"</span>)`,wrap:!1}}),ke=new D({props:{title:"리소스",local:"resources",headingTag:"h2"}}),Le=new _a({props:{pipeline:"text-generation"}}),qe=new D({props:{title:"LlamaConfig",local:"llamaconfig ][ transformers.LlamaConfig",headingTag:"h2"}}),Fe=new q({props:{name:"class transformers.LlamaConfig",anchor:"transformers.LlamaConfig",parameters:[{name:"vocab_size",val:" = 32000"},{name:"hidden_size",val:" = 4096"},{name:"intermediate_size",val:" = 11008"},{name:"num_hidden_layers",val:" = 32"},{name:"num_attention_heads",val:" = 32"},{name:"num_key_value_heads",val:" = None"},{name:"hidden_act",val:" = 'silu'"},{name:"max_position_embeddings",val:" = 2048"},{name:"initializer_range",val:" = 0.02"},{name:"rms_norm_eps",val:" = 1e-06"},{name:"use_cache",val:" = True"},{name:"pad_token_id",val:" = None"},{name:"bos_token_id",val:" = 1"},{name:"eos_token_id",val:" = 2"},{name:"pretraining_tp",val:" = 1"},{name:"tie_word_embeddings",val:" = False"},{name:"rope_theta",val:" = 10000.0"},{name:"rope_scaling",val:" = None"},{name:"attention_bias",val:" = False"},{name:"attention_dropout",val:" = 0.0"},{name:"mlp_bias",val:" = False"},{name:"head_dim",val:" = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.LlamaConfig.vocab_size",description:`<strong>vocab_size</strong> (<code>int</code>, <em>optional</em>, defaults to 32000) —
	Vocabulary size of the LLaMA model. Defines the number of different tokens that can be represented by the
	<code>inputs_ids</code> passed when calling <a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaModel">LlamaModel</a>`,name:"vocab_size"},{anchor:"transformers.LlamaConfig.hidden_size",description:`<strong>hidden_size</strong> (<code>int</code>, <em>optional</em>, defaults to 4096) —
	Dimension of the hidden representations.`,name:"hidden_size"},{anchor:"transformers.LlamaConfig.intermediate_size",description:`<strong>intermediate_size</strong> (<code>int</code>, <em>optional</em>, defaults to 11008) —
	Dimension of the MLP representations.`,name:"intermediate_size"},{anchor:"transformers.LlamaConfig.num_hidden_layers",description:`<strong>num_hidden_layers</strong> (<code>int</code>, <em>optional</em>, defaults to 32) —
	Number of hidden layers in the Transformer decoder.`,name:"num_hidden_layers"},{anchor:"transformers.LlamaConfig.num_attention_heads",description:`<strong>num_attention_heads</strong> (<code>int</code>, <em>optional</em>, defaults to 32) —
	Number of attention heads for each attention layer in the Transformer decoder.`,name:"num_attention_heads"},{anchor:"transformers.LlamaConfig.num_key_value_heads",description:`<strong>num_key_value_heads</strong> (<code>int</code>, <em>optional</em>) —
	This is the number of key_value heads that should be used to implement Grouped Query Attention. If
	<code>num_key_value_heads=num_attention_heads</code>, the model will use Multi Head Attention (MHA), if
	<code>num_key_value_heads=1</code> the model will use Multi Query Attention (MQA) otherwise GQA is used. When
	converting a multi-head checkpoint to a GQA checkpoint, each group key and value head should be constructed
	by meanpooling all the original heads within that group. For more details checkout <a href="https://arxiv.org/pdf/2305.13245.pdf" rel="nofollow">this
	paper</a>. If it is not specified, will default to
	<code>num_attention_heads</code>.`,name:"num_key_value_heads"},{anchor:"transformers.LlamaConfig.hidden_act",description:`<strong>hidden_act</strong> (<code>str</code> or <code>function</code>, <em>optional</em>, defaults to <code>"silu"</code>) —
	The non-linear activation function (function or string) in the decoder.`,name:"hidden_act"},{anchor:"transformers.LlamaConfig.max_position_embeddings",description:`<strong>max_position_embeddings</strong> (<code>int</code>, <em>optional</em>, defaults to 2048) —
	The maximum sequence length that this model might ever be used with. Llama 1 supports up to 2048 tokens,
	Llama 2 up to 4096, CodeLlama up to 16384.`,name:"max_position_embeddings"},{anchor:"transformers.LlamaConfig.initializer_range",description:`<strong>initializer_range</strong> (<code>float</code>, <em>optional</em>, defaults to 0.02) —
	The standard deviation of the truncated_normal_initializer for initializing all weight matrices.`,name:"initializer_range"},{anchor:"transformers.LlamaConfig.rms_norm_eps",description:`<strong>rms_norm_eps</strong> (<code>float</code>, <em>optional</em>, defaults to 1e-06) —
	The epsilon used by the rms normalization layers.`,name:"rms_norm_eps"},{anchor:"transformers.LlamaConfig.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not the model should return the last key/values attentions (not used by all models). Only
	relevant if <code>config.is_decoder=True</code>.`,name:"use_cache"},{anchor:"transformers.LlamaConfig.pad_token_id",description:`<strong>pad_token_id</strong> (<code>int</code>, <em>optional</em>) —
	Padding token id.`,name:"pad_token_id"},{anchor:"transformers.LlamaConfig.bos_token_id",description:`<strong>bos_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	Beginning of stream token id.`,name:"bos_token_id"},{anchor:"transformers.LlamaConfig.eos_token_id",description:`<strong>eos_token_id</strong> (<code>int</code>, <em>optional</em>, defaults to 2) —
	End of stream token id.`,name:"eos_token_id"},{anchor:"transformers.LlamaConfig.pretraining_tp",description:`<strong>pretraining_tp</strong> (<code>int</code>, <em>optional</em>, defaults to 1) —
	Experimental feature. Tensor parallelism rank used during pretraining. Please refer to <a href="https://huggingface.co/docs/transformers/main/perf_train_gpu_many#tensor-parallelism" rel="nofollow">this
	document</a> to
	understand more about it. This value is necessary to ensure exact reproducibility of the pretraining
	results. Please refer to <a href="https://github.com/pytorch/pytorch/issues/76232" rel="nofollow">this issue</a>.`,name:"pretraining_tp"},{anchor:"transformers.LlamaConfig.tie_word_embeddings",description:`<strong>tie_word_embeddings</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether to tie weight embeddings`,name:"tie_word_embeddings"},{anchor:"transformers.LlamaConfig.rope_theta",description:`<strong>rope_theta</strong> (<code>float</code>, <em>optional</em>, defaults to 10000.0) —
	The base period of the RoPE embeddings.`,name:"rope_theta"},{anchor:"transformers.LlamaConfig.rope_scaling",description:`<strong>rope_scaling</strong> (<code>Dict</code>, <em>optional</em>) —
	Dictionary containing the scaling configuration for the RoPE embeddings. NOTE: if you apply new rope type
	and you expect the model to work on longer <code>max_position_embeddings</code>, we recommend you to update this value
	accordingly.
	Expected contents:
	<code>rope_type</code> (<code>str</code>):
	The sub-variant of RoPE to use. Can be one of [‘default’, ‘linear’, ‘dynamic’, ‘yarn’, ‘longrope’,
	‘llama3’], with ‘default’ being the original RoPE implementation.
	<code>factor</code> (<code>float</code>, <em>optional</em>):
	Used with all rope types except ‘default’. The scaling factor to apply to the RoPE embeddings. In
	most scaling types, a <code>factor</code> of x will enable the model to handle sequences of length x <em>
	original maximum pre-trained length.
	<code>original_max_position_embeddings</code> (<code>int</code>, </em>optional<em>):
	Used with ‘dynamic’, ‘longrope’ and ‘llama3’. The original max position embeddings used during
	pretraining.
	<code>attention_factor</code> (<code>float</code>, </em>optional<em>):
	Used with ‘yarn’ and ‘longrope’. The scaling factor to be applied on the attention
	computation. If unspecified, it defaults to value recommended by the implementation, using the
	<code>factor</code> field to infer the suggested value.
	<code>beta_fast</code> (<code>float</code>, </em>optional<em>):
	Only used with ‘yarn’. Parameter to set the boundary for extrapolation (only) in the linear
	ramp function. If unspecified, it defaults to 32.
	<code>beta_slow</code> (<code>float</code>, </em>optional<em>):
	Only used with ‘yarn’. Parameter to set the boundary for interpolation (only) in the linear
	ramp function. If unspecified, it defaults to 1.
	<code>short_factor</code> (<code>List[float]</code>, </em>optional<em>):
	Only used with ‘longrope’. The scaling factor to be applied to short contexts (<
	<code>original_max_position_embeddings</code>). Must be a list of numbers with the same length as the hidden
	size divided by the number of attention heads divided by 2
	<code>long_factor</code> (<code>List[float]</code>, </em>optional<em>):
	Only used with ‘longrope’. The scaling factor to be applied to long contexts (<
	<code>original_max_position_embeddings</code>). Must be a list of numbers with the same length as the hidden
	size divided by the number of attention heads divided by 2
	<code>low_freq_factor</code> (<code>float</code>, </em>optional<em>):
	Only used with ‘llama3’. Scaling factor applied to low frequency components of the RoPE
	<code>high_freq_factor</code> (<code>float</code>, </em>optional*):
	Only used with ‘llama3’. Scaling factor applied to high frequency components of the RoPE`,name:"rope_scaling"},{anchor:"transformers.LlamaConfig.attention_bias",description:`<strong>attention_bias</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether to use a bias in the query, key, value and output projection layers during self-attention.`,name:"attention_bias"},{anchor:"transformers.LlamaConfig.attention_dropout",description:`<strong>attention_dropout</strong> (<code>float</code>, <em>optional</em>, defaults to 0.0) —
	The dropout ratio for the attention probabilities.`,name:"attention_dropout"},{anchor:"transformers.LlamaConfig.mlp_bias",description:`<strong>mlp_bias</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether to use a bias in up_proj, down_proj and gate_proj layers in the MLP layers.`,name:"mlp_bias"},{anchor:"transformers.LlamaConfig.head_dim",description:`<strong>head_dim</strong> (<code>int</code>, <em>optional</em>) —
	The attention head dimension. If None, it will default to hidden_size // num_heads`,name:"head_dim"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/configuration_llama.py#L26"}}),V=new zo({props:{anchor:"transformers.LlamaConfig.example",$$slots:{default:[va]},$$scope:{ctx:C}}}),je=new D({props:{title:"LlamaTokenizer",local:"llamatokenizer ][ transformers.LlamaTokenizer",headingTag:"h2"}}),Ie=new q({props:{name:"class transformers.LlamaTokenizer",anchor:"transformers.LlamaTokenizer",parameters:[{name:"vocab_file",val:""},{name:"unk_token",val:" = '<unk>'"},{name:"bos_token",val:" = '<s>'"},{name:"eos_token",val:" = '</s>'"},{name:"pad_token",val:" = None"},{name:"sp_model_kwargs",val:": Optional = None"},{name:"add_bos_token",val:" = True"},{name:"add_eos_token",val:" = False"},{name:"clean_up_tokenization_spaces",val:" = False"},{name:"use_default_system_prompt",val:" = False"},{name:"spaces_between_special_tokens",val:" = False"},{name:"legacy",val:" = None"},{name:"add_prefix_space",val:" = True"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.LlamaTokenizer.vocab_file",description:`<strong>vocab_file</strong> (<code>str</code>) —
	Path to the vocabulary file.`,name:"vocab_file"},{anchor:"transformers.LlamaTokenizer.unk_token",description:`<strong>unk_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"<unk>"</code>) —
	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
	token instead.`,name:"unk_token"},{anchor:"transformers.LlamaTokenizer.bos_token",description:`<strong>bos_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"<s>"</code>) —
	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.`,name:"bos_token"},{anchor:"transformers.LlamaTokenizer.eos_token",description:`<strong>eos_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"</s>"</code>) —
	The end of sequence token.`,name:"eos_token"},{anchor:"transformers.LlamaTokenizer.pad_token",description:`<strong>pad_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>) —
	A special token used to make arrays of tokens the same size for batching purpose. Will then be ignored by
	attention mechanisms or loss computation.`,name:"pad_token"},{anchor:"transformers.LlamaTokenizer.sp_model_kwargs",description:`<strong>sp_model_kwargs</strong> (<code>Dict[str, Any]</code>, <code>Optional</code>, <em>optional</em>) —
	Will be passed to the <code>SentencePieceProcessor.__init__()</code> method. The <a href="https://github.com/google/sentencepiece/tree/master/python" rel="nofollow">Python wrapper for
	SentencePiece</a> can be used, among other things,
	to set:</p>
	<ul>
	<li>
	<p><code>enable_sampling</code>: Enable subword regularization.</p>
	</li>
	<li>
	<p><code>nbest_size</code>: Sampling parameters for unigram. Invalid for BPE-Dropout.</p>
	<ul>
	<li><code>nbest_size = {0,1}</code>: No sampling is performed.</li>
	<li><code>nbest_size > 1</code>: samples from the nbest_size results.</li>
	<li><code>nbest_size < 0</code>: assuming that nbest_size is infinite and samples from the all hypothesis (lattice)
	using forward-filtering-and-backward-sampling algorithm.</li>
	</ul>
	</li>
	<li>
	<p><code>alpha</code>: Smoothing parameter for unigram sampling, and dropout probability of merge operations for
	BPE-dropout.</p>
	</li>
	</ul>`,name:"sp_model_kwargs"},{anchor:"transformers.LlamaTokenizer.add_bos_token",description:`<strong>add_bos_token</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to add an <code>bos_token</code> at the start of sequences.`,name:"add_bos_token"},{anchor:"transformers.LlamaTokenizer.add_eos_token",description:`<strong>add_eos_token</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not to add an <code>eos_token</code> at the end of sequences.`,name:"add_eos_token"},{anchor:"transformers.LlamaTokenizer.clean_up_tokenization_spaces",description:`<strong>clean_up_tokenization_spaces</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
	extra spaces.`,name:"clean_up_tokenization_spaces"},{anchor:"transformers.LlamaTokenizer.use_default_system_prompt",description:`<strong>use_default_system_prompt</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not the default system prompt for Llama should be used.`,name:"use_default_system_prompt"},{anchor:"transformers.LlamaTokenizer.spaces_between_special_tokens",description:`<strong>spaces_between_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not to add spaces between special tokens.`,name:"spaces_between_special_tokens"},{anchor:"transformers.LlamaTokenizer.legacy",description:`<strong>legacy</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not the <code>legacy</code> behavior of the tokenizer should be used. Legacy is before the merge of #24622
	and #25224 which includes fixes to properly handle tokens that appear after special tokens.
	Make sure to also set <code>from_slow</code> to <code>True</code>.
	A simple example:</p>
	<ul>
	<li><code>legacy=True</code>:</li>
	</ul>`,name:"legacy"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama.py#L56"}}),Pe=new q({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.LlamaTokenizer.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:""},{name:"token_ids_1",val:" = None"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama.py#L333"}}),We=new q({props:{name:"get_special_tokens_mask",anchor:"transformers.LlamaTokenizer.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.LlamaTokenizer.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) —
	List of IDs.`,name:"token_ids_0"},{anchor:"transformers.LlamaTokenizer.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) —
	Optional second list of IDs for sequence pairs.`,name:"token_ids_1"},{anchor:"transformers.LlamaTokenizer.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama.py#L344",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>List[int]</code></p>
	`}}),He=new q({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.LlamaTokenizer.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.LlamaTokenizer.create_token_type_ids_from_sequences.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) —
	List of ids.`,name:"token_ids_0"},{anchor:"transformers.LlamaTokenizer.create_token_type_ids_from_sequences.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) —
	Optional second list of IDs for sequence pairs.`,name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama.py#L381",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>List of <a href="../glossary#token-type-ids">token type IDs</a> according to the given sequence(s).</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>List[int]</code></p>
	`}}),Y=new zo({props:{anchor:"transformers.LlamaTokenizer.create_token_type_ids_from_sequences.example",$$slots:{default:[ya]},$$scope:{ctx:C}}}),Ue=new q({props:{name:"save_vocabulary",anchor:"transformers.LlamaTokenizer.save_vocabulary",parameters:[{name:"save_directory",val:""},{name:"filename_prefix",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.LlamaTokenizer.save_vocabulary.save_directory",description:`<strong>save_directory</strong> (<code>str</code>) —
	The directory in which to save the vocabulary.`,name:"save_directory"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama.py#L306",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>Paths to the files saved.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>Tuple(str)</code></p>
	`}}),Ae=new D({props:{title:"LlamaTokenizerFast",local:"llamatokenizerfast ][ transformers.LlamaTokenizerFast",headingTag:"h2"}}),Ee=new q({props:{name:"class transformers.LlamaTokenizerFast",anchor:"transformers.LlamaTokenizerFast",parameters:[{name:"vocab_file",val:" = None"},{name:"tokenizer_file",val:" = None"},{name:"clean_up_tokenization_spaces",val:" = False"},{name:"unk_token",val:" = '<unk>'"},{name:"bos_token",val:" = '<s>'"},{name:"eos_token",val:" = '</s>'"},{name:"add_bos_token",val:" = True"},{name:"add_eos_token",val:" = False"},{name:"use_default_system_prompt",val:" = False"},{name:"legacy",val:" = None"},{name:"add_prefix_space",val:" = None"},{name:"**kwargs",val:""}],parametersDescription:[{anchor:"transformers.LlamaTokenizerFast.vocab_file",description:`<strong>vocab_file</strong> (<code>str</code>, <em>optional</em>) —
	<a href="https://github.com/google/sentencepiece" rel="nofollow">SentencePiece</a> file (generally has a .model extension) that
	contains the vocabulary necessary to instantiate a tokenizer.`,name:"vocab_file"},{anchor:"transformers.LlamaTokenizerFast.tokenizer_file",description:`<strong>tokenizer_file</strong> (<code>str</code>, <em>optional</em>) —
	<a href="https://github.com/huggingface/tokenizers" rel="nofollow">tokenizers</a> file (generally has a .json extension) that
	contains everything needed to load the tokenizer.`,name:"tokenizer_file"},{anchor:"transformers.LlamaTokenizerFast.clean_up_tokenization_spaces",description:`<strong>clean_up_tokenization_spaces</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not to cleanup spaces after decoding, cleanup consists in removing potential artifacts like
	extra spaces.`,name:"clean_up_tokenization_spaces"},{anchor:"transformers.LlamaTokenizerFast.unk_token",description:`<strong>unk_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"<unk>"</code>) —
	The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
	token instead.`,name:"unk_token"},{anchor:"transformers.LlamaTokenizerFast.bos_token",description:`<strong>bos_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"<s>"</code>) —
	The beginning of sequence token that was used during pretraining. Can be used a sequence classifier token.`,name:"bos_token"},{anchor:"transformers.LlamaTokenizerFast.eos_token",description:`<strong>eos_token</strong> (<code>str</code> or <code>tokenizers.AddedToken</code>, <em>optional</em>, defaults to <code>"</s>"</code>) —
	The end of sequence token.`,name:"eos_token"},{anchor:"transformers.LlamaTokenizerFast.add_bos_token",description:`<strong>add_bos_token</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>True</code>) —
	Whether or not to add an <code>bos_token</code> at the start of sequences.`,name:"add_bos_token"},{anchor:"transformers.LlamaTokenizerFast.add_eos_token",description:`<strong>add_eos_token</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not to add an <code>eos_token</code> at the end of sequences.`,name:"add_eos_token"},{anchor:"transformers.LlamaTokenizerFast.use_default_system_prompt",description:`<strong>use_default_system_prompt</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not the default system prompt for Llama should be used`,name:"use_default_system_prompt"},{anchor:"transformers.LlamaTokenizerFast.legacy",description:`<strong>legacy</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not the <code>legacy</code> behavior of the tokenizer should be used. Legacy is before the merge of #24622
	and #25224 which includes fixes to properly handle tokens that appear after special tokens.
	Make sure to also set <code>from_slow</code> to <code>True</code>.
	A simple example:</p>
	<ul>
	<li><code>legacy=True</code>:</li>
	</ul>`,name:"legacy"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama_fast.py#L49"}}),Q=new zo({props:{anchor:"transformers.LlamaTokenizerFast.example",$$slots:{default:[La]},$$scope:{ctx:C}}}),Se=new q({props:{name:"build_inputs_with_special_tokens",anchor:"transformers.LlamaTokenizerFast.build_inputs_with_special_tokens",parameters:[{name:"token_ids_0",val:""},{name:"token_ids_1",val:" = None"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama_fast.py#L246"}}),Ne=new q({props:{name:"get_special_tokens_mask",anchor:"transformers.LlamaTokenizerFast.get_special_tokens_mask",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"},{name:"already_has_special_tokens",val:": bool = False"}],parametersDescription:[{anchor:"transformers.LlamaTokenizerFast.get_special_tokens_mask.token_ids_0",description:`<strong>token_ids_0</strong> (<code>List[int]</code>) —
	List of ids of the first sequence.`,name:"token_ids_0"},{anchor:"transformers.LlamaTokenizerFast.get_special_tokens_mask.token_ids_1",description:`<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) —
	List of ids of the second sequence.`,name:"token_ids_1"},{anchor:"transformers.LlamaTokenizerFast.get_special_tokens_mask.already_has_special_tokens",description:`<strong>already_has_special_tokens</strong> (<code>bool</code>, <em>optional</em>, defaults to <code>False</code>) —
	Whether or not the token list is already formatted with special tokens for the model.`,name:"already_has_special_tokens"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/tokenization_utils_base.py#L3945",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>1 for a special token, 0 for a sequence token.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A list of integers in the range [0, 1]</p>
	`}}),Je=new q({props:{name:"create_token_type_ids_from_sequences",anchor:"transformers.LlamaTokenizerFast.create_token_type_ids_from_sequences",parameters:[{name:"token_ids_0",val:": List"},{name:"token_ids_1",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.LlamaTokenizerFast.create_token_type_ids_from_sequences.token_ids_0",description:"<strong>token_ids_0</strong> (<code>List[int]</code>) — The first tokenized sequence.",name:"token_ids_0"},{anchor:"transformers.LlamaTokenizerFast.create_token_type_ids_from_sequences.token_ids_1",description:"<strong>token_ids_1</strong> (<code>List[int]</code>, <em>optional</em>) — The second tokenized sequence.",name:"token_ids_1"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/tokenization_utils_base.py#L3475",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>The token type ids.</p>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>List[int]</code></p>
	`}}),Re=new q({props:{name:"update_post_processor",anchor:"transformers.LlamaTokenizerFast.update_post_processor",parameters:[],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama_fast.py#L181"}}),Ge=new q({props:{name:"save_vocabulary",anchor:"transformers.LlamaTokenizerFast.save_vocabulary",parameters:[{name:"save_directory",val:": str"},{name:"filename_prefix",val:": Optional = None"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/tokenization_llama_fast.py#L225"}}),De=new D({props:{title:"LlamaModel",local:"llamamodel ][ transformers.LlamaModel",headingTag:"h2"}}),Ze=new q({props:{name:"class transformers.LlamaModel",anchor:"transformers.LlamaModel",parameters:[{name:"config",val:": LlamaConfig"}],parametersDescription:[{anchor:"transformers.LlamaModel.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaConfig">LlamaConfig</a>) —
	Model configuration class with all the parameters of the model. Initializing with a config file does not
	load the weights associated with the model, only the configuration. Check out the
	<code>from_pretrained()</code> method to load the model weights.
	config — LlamaConfig`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L877"}}),Be=new q({props:{name:"forward",anchor:"transformers.LlamaModel.forward",parameters:[{name:"input_ids",val:": LongTensor = None"},{name:"attention_mask",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"past_key_values",val:": Union = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"cache_position",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.LlamaModel.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
	it.</p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.LlamaModel.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a></p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p>If <code>past_key_values</code> is used, optionally only the last <code>input_ids</code> have to be input (see
	<code>past_key_values</code>).</p>
	<p>If you want to change padding behavior, you should read <code>modeling_opt._prepare_decoder_attention_mask</code>
	and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more
	information on the default strategy.</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"attention_mask"},{anchor:"transformers.LlamaModel.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.n_positions - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.LlamaModel.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>Cache</code> or <code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>) —
	Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
	blocks) that can be used to speed up sequential decoding. This typically consists in the <code>past_key_values</code>
	returned by the model at a previous stage of decoding, when <code>use_cache=True</code> or <code>config.use_cache=True</code>.</p>
	<p>Two formats are allowed:</p>
	<ul>
	<li>a <code>Cache</code> instance;</li>
	<li>Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of
	shape <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>). This is also known as the legacy
	cache format.</li>
	</ul>
	<p>The model will output the same cache format that is fed as input. If no <code>past_key_values</code> are passed, the
	legacy cache format will be returned.</p>
	<p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>input_ids</code> (those that don’t
	have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all <code>input_ids</code>
	of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.LlamaModel.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.LlamaModel.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) —
	If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see
	<code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.LlamaModel.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.LlamaModel.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.LlamaModel.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.LlamaModel.forward.cache_position",description:`<strong>cache_position</strong> (<code>torch.LongTensor</code> of shape <code>(sequence_length)</code>, <em>optional</em>) —
	Indices depicting the position of the input sequence tokens in the sequence. Contrarily to <code>position_ids</code>,
	this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
	the complete sequence length.`,name:"cache_position"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L911"}}),te=new xo({props:{$$slots:{default:[Ta]},$$scope:{ctx:C}}}),Ve=new D({props:{title:"LlamaForCausalLM",local:"llamaforcausallm ][ transformers.LlamaForCausalLM",headingTag:"h2"}}),Xe=new q({props:{name:"class transformers.LlamaForCausalLM",anchor:"transformers.LlamaForCausalLM",parameters:[{name:"config",val:""}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L1100"}}),Ye=new q({props:{name:"forward",anchor:"transformers.LlamaForCausalLM.forward",parameters:[{name:"input_ids",val:": LongTensor = None"},{name:"attention_mask",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"past_key_values",val:": Union = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"},{name:"cache_position",val:": Optional = None"},{name:"num_logits_to_keep",val:": int = 0"}],parametersDescription:[{anchor:"transformers.LlamaForCausalLM.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
	it.</p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.LlamaForCausalLM.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a></p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p>If <code>past_key_values</code> is used, optionally only the last <code>input_ids</code> have to be input (see
	<code>past_key_values</code>).</p>
	<p>If you want to change padding behavior, you should read <code>modeling_opt._prepare_decoder_attention_mask</code>
	and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more
	information on the default strategy.</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"attention_mask"},{anchor:"transformers.LlamaForCausalLM.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.n_positions - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.LlamaForCausalLM.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>Cache</code> or <code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>) —
	Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
	blocks) that can be used to speed up sequential decoding. This typically consists in the <code>past_key_values</code>
	returned by the model at a previous stage of decoding, when <code>use_cache=True</code> or <code>config.use_cache=True</code>.</p>
	<p>Two formats are allowed:</p>
	<ul>
	<li>a <code>Cache</code> instance;</li>
	<li>Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of
	shape <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>). This is also known as the legacy
	cache format.</li>
	</ul>
	<p>The model will output the same cache format that is fed as input. If no <code>past_key_values</code> are passed, the
	legacy cache format will be returned.</p>
	<p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>input_ids</code> (those that don’t
	have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all <code>input_ids</code>
	of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.LlamaForCausalLM.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.LlamaForCausalLM.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) —
	If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see
	<code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.LlamaForCausalLM.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.LlamaForCausalLM.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.LlamaForCausalLM.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.LlamaForCausalLM.forward.cache_position",description:`<strong>cache_position</strong> (<code>torch.LongTensor</code> of shape <code>(sequence_length)</code>, <em>optional</em>) —
	Indices depicting the position of the input sequence tokens in the sequence. Contrarily to <code>position_ids</code>,
	this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
	the complete sequence length.</p>
	<p>Args —
	labels (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>):
	Labels for computing the masked language modeling loss. Indices should either be in <code>[0, ..., config.vocab_size]</code> or -100 (see <code>input_ids</code> docstring). Tokens with indices set to <code>-100</code> are ignored
	(masked), the loss is only computed for the tokens with labels in <code>[0, ..., config.vocab_size]</code>.</p>
	<p>num_logits_to_keep (<code>int</code>, <em>optional</em>):
	Calculate logits for the last <code>num_logits_to_keep</code> tokens. If <code>0</code>, calculate logits for all
	<code>input_ids</code> (special case). Only last token logits are needed for generation, and calculating them only for that
	token can save memory, which becomes pretty significant for long sequences or large vocabulary size.`,name:"cache_position"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L1130",returnDescription:`<script context="module">export const metadata = 'undefined';<\/script>


	<p>A <code>transformers.modeling_outputs.CausalLMOutputWithPast</code> or a tuple of
	<code>torch.FloatTensor</code> (if <code>return_dict=False</code> is passed or when <code>config.return_dict=False</code>) comprising various
	elements depending on the configuration (<a
	href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaConfig"
	>LlamaConfig</a>) and inputs.</p>
	<ul>
	<li>
	<p><strong>loss</strong> (<code>torch.FloatTensor</code> of shape <code>(1,)</code>, <em>optional</em>, returned when <code>labels</code> is provided) — Language modeling loss (for next-token prediction).</p>
	</li>
	<li>
	<p><strong>logits</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, config.vocab_size)</code>) — Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).</p>
	</li>
	<li>
	<p><strong>past_key_values</strong> (<code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>, returned when <code>use_cache=True</code> is passed or when <code>config.use_cache=True</code>) — Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of shape
	<code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>)</p>
	<p>Contains pre-computed hidden-states (key and values in the self-attention blocks) that can be used (see
	<code>past_key_values</code> input) to speed up sequential decoding.</p>
	</li>
	<li>
	<p><strong>hidden_states</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_hidden_states=True</code> is passed or when <code>config.output_hidden_states=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for the output of the embeddings, if the model has an embedding layer, +
	one for the output of each layer) of shape <code>(batch_size, sequence_length, hidden_size)</code>.</p>
	<p>Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.</p>
	</li>
	<li>
	<p><strong>attentions</strong> (<code>tuple(torch.FloatTensor)</code>, <em>optional</em>, returned when <code>output_attentions=True</code> is passed or when <code>config.output_attentions=True</code>) — Tuple of <code>torch.FloatTensor</code> (one for each layer) of shape <code>(batch_size, num_heads, sequence_length, sequence_length)</code>.</p>
	<p>Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
	heads.</p>
	</li>
	</ul>
	`,returnType:`<script context="module">export const metadata = 'undefined';<\/script>


	<p><code>transformers.modeling_outputs.CausalLMOutputWithPast</code> or <code>tuple(torch.FloatTensor)</code></p>
	`}}),oe=new xo({props:{$$slots:{default:[$a]},$$scope:{ctx:C}}}),ne=new zo({props:{anchor:"transformers.LlamaForCausalLM.forward.example",$$slots:{default:[wa]},$$scope:{ctx:C}}}),Oe=new D({props:{title:"LlamaForSequenceClassification",local:"llamaforsequenceclassification ][ transformers.LlamaForSequenceClassification",headingTag:"h2"}}),Qe=new q({props:{name:"class transformers.LlamaForSequenceClassification",anchor:"transformers.LlamaForSequenceClassification",parameters:[{name:"config",val:""}],parametersDescription:[{anchor:"transformers.LlamaForSequenceClassification.config",description:`<strong>config</strong> (<a href="/docs/transformers/pr_33174/ko/model_doc/llama2#transformers.LlamaConfig">LlamaConfig</a>) —
	Model configuration class with all the parameters of the model. Initializing with a config file does not
	load the weights associated with the model, only the configuration. Check out the
	<code>from_pretrained()</code> method to load the model weights.`,name:"config"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L1311"}}),Ke=new q({props:{name:"forward",anchor:"transformers.LlamaForSequenceClassification.forward",parameters:[{name:"input_ids",val:": Optional = None"},{name:"attention_mask",val:": Optional = None"},{name:"position_ids",val:": Optional = None"},{name:"past_key_values",val:": Union = None"},{name:"inputs_embeds",val:": Optional = None"},{name:"labels",val:": Optional = None"},{name:"use_cache",val:": Optional = None"},{name:"output_attentions",val:": Optional = None"},{name:"output_hidden_states",val:": Optional = None"},{name:"return_dict",val:": Optional = None"}],parametersDescription:[{anchor:"transformers.LlamaForSequenceClassification.forward.input_ids",description:`<strong>input_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>) —
	Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
	it.</p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p><a href="../glossary#input-ids">What are input IDs?</a>`,name:"input_ids"},{anchor:"transformers.LlamaForSequenceClassification.forward.attention_mask",description:`<strong>attention_mask</strong> (<code>torch.Tensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Mask to avoid performing attention on padding token indices. Mask values selected in <code>[0, 1]</code>:</p>
	<ul>
	<li>1 for tokens that are <strong>not masked</strong>,</li>
	<li>0 for tokens that are <strong>masked</strong>.</li>
	</ul>
	<p><a href="../glossary#attention-mask">What are attention masks?</a></p>
	<p>Indices can be obtained using <code>AutoTokenizer</code>. See <code>PreTrainedTokenizer.encode()</code> and
	<code>PreTrainedTokenizer.__call__()</code> for details.</p>
	<p>If <code>past_key_values</code> is used, optionally only the last <code>input_ids</code> have to be input (see
	<code>past_key_values</code>).</p>
	<p>If you want to change padding behavior, you should read <code>modeling_opt._prepare_decoder_attention_mask</code>
	and modify to your needs. See diagram 1 in <a href="https://arxiv.org/abs/1910.13461" rel="nofollow">the paper</a> for more
	information on the default strategy.</p>
	<ul>
	<li>1 indicates the head is <strong>not masked</strong>,</li>
	<li>0 indicates the head is <strong>masked</strong>.</li>
	</ul>`,name:"attention_mask"},{anchor:"transformers.LlamaForSequenceClassification.forward.position_ids",description:`<strong>position_ids</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size, sequence_length)</code>, <em>optional</em>) —
	Indices of positions of each input sequence tokens in the position embeddings. Selected in the range <code>[0, config.n_positions - 1]</code>.</p>
	<p><a href="../glossary#position-ids">What are position IDs?</a>`,name:"position_ids"},{anchor:"transformers.LlamaForSequenceClassification.forward.past_key_values",description:`<strong>past_key_values</strong> (<code>Cache</code> or <code>tuple(tuple(torch.FloatTensor))</code>, <em>optional</em>) —
	Pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
	blocks) that can be used to speed up sequential decoding. This typically consists in the <code>past_key_values</code>
	returned by the model at a previous stage of decoding, when <code>use_cache=True</code> or <code>config.use_cache=True</code>.</p>
	<p>Two formats are allowed:</p>
	<ul>
	<li>a <code>Cache</code> instance;</li>
	<li>Tuple of <code>tuple(torch.FloatTensor)</code> of length <code>config.n_layers</code>, with each tuple having 2 tensors of
	shape <code>(batch_size, num_heads, sequence_length, embed_size_per_head)</code>). This is also known as the legacy
	cache format.</li>
	</ul>
	<p>The model will output the same cache format that is fed as input. If no <code>past_key_values</code> are passed, the
	legacy cache format will be returned.</p>
	<p>If <code>past_key_values</code> are used, the user can optionally input only the last <code>input_ids</code> (those that don’t
	have their past key value states given to this model) of shape <code>(batch_size, 1)</code> instead of all <code>input_ids</code>
	of shape <code>(batch_size, sequence_length)</code>.`,name:"past_key_values"},{anchor:"transformers.LlamaForSequenceClassification.forward.inputs_embeds",description:`<strong>inputs_embeds</strong> (<code>torch.FloatTensor</code> of shape <code>(batch_size, sequence_length, hidden_size)</code>, <em>optional</em>) —
	Optionally, instead of passing <code>input_ids</code> you can choose to directly pass an embedded representation. This
	is useful if you want more control over how to convert <code>input_ids</code> indices into associated vectors than the
	model’s internal embedding lookup matrix.`,name:"inputs_embeds"},{anchor:"transformers.LlamaForSequenceClassification.forward.use_cache",description:`<strong>use_cache</strong> (<code>bool</code>, <em>optional</em>) —
	If set to <code>True</code>, <code>past_key_values</code> key value states are returned and can be used to speed up decoding (see
	<code>past_key_values</code>).`,name:"use_cache"},{anchor:"transformers.LlamaForSequenceClassification.forward.output_attentions",description:`<strong>output_attentions</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the attentions tensors of all attention layers. See <code>attentions</code> under returned
	tensors for more detail.`,name:"output_attentions"},{anchor:"transformers.LlamaForSequenceClassification.forward.output_hidden_states",description:`<strong>output_hidden_states</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return the hidden states of all layers. See <code>hidden_states</code> under returned tensors for
	more detail.`,name:"output_hidden_states"},{anchor:"transformers.LlamaForSequenceClassification.forward.return_dict",description:`<strong>return_dict</strong> (<code>bool</code>, <em>optional</em>) —
	Whether or not to return a <code>ModelOutput</code> instead of a plain tuple.`,name:"return_dict"},{anchor:"transformers.LlamaForSequenceClassification.forward.cache_position",description:`<strong>cache_position</strong> (<code>torch.LongTensor</code> of shape <code>(sequence_length)</code>, <em>optional</em>) —
	Indices depicting the position of the input sequence tokens in the sequence. Contrarily to <code>position_ids</code>,
	this tensor is not affected by padding. It is used to update the cache in the correct position and to infer
	the complete sequence length.`,name:"cache_position"},{anchor:"transformers.LlamaForSequenceClassification.forward.labels",description:`<strong>labels</strong> (<code>torch.LongTensor</code> of shape <code>(batch_size,)</code>, <em>optional</em>) —
	Labels for computing the sequence classification/regression loss. Indices should be in <code>[0, ..., config.num_labels - 1]</code>. If <code>config.num_labels == 1</code> a regression loss is computed (Mean-Square loss), If
	<code>config.num_labels > 1</code> a classification loss is computed (Cross-Entropy).`,name:"labels"}],source:"https://github.com/huggingface/transformers/blob/vr_33174/src/transformers/models/llama/modeling_llama.py#L1342"}}),ae=new xo({props:{$$slots:{default:[Ma]},$$scope:{ctx:C}}}),et=new ba({props:{source:"https://github.com/huggingface/transformers/blob/main/docs/source/ko/model_doc/llama2.md"}}),{c(){d=i("meta"),v=n(),c=i("p"),k=n(),p(w.$$.fragment),b=n(),p(T.$$.fragment),ie=n(),$=i("p"),$.innerHTML=Z,Et=n(),le=i("p"),le.textContent=yn,St=n(),de=i("p"),de.innerHTML=Ln,Nt=n(),ce=i("p"),ce.innerHTML=Tn,Jt=n(),p(B.$$.fragment),Rt=n(),me=i("p"),me.textContent=$n,Gt=n(),pe=i("ul"),pe.innerHTML=wn,Dt=n(),p(ue.$$.fragment),Zt=n(),he=i("ul"),he.innerHTML=Mn,Bt=n(),p(fe.$$.fragment),Vt=n(),ge=i("p"),ge.textContent=xn,Xt=n(),_e=i("ul"),_e.innerHTML=zn,Yt=n(),be=i("p"),be.innerHTML=Cn,Ot=n(),p(ke.$$.fragment),Qt=n(),ve=i("p"),ve.textContent=qn,Kt=n(),ye=i("ul"),ye.innerHTML=Fn,eo=n(),p(Le.$$.fragment),to=n(),Te=i("ul"),Te.innerHTML=jn,oo=n(),$e=i("p"),$e.textContent=In,no=n(),we=i("ul"),we.innerHTML=Pn,ao=n(),Me=i("p"),Me.textContent=Wn,so=n(),xe=i("ul"),xe.innerHTML=Hn,ro=n(),ze=i("p"),ze.textContent=Un,io=n(),Ce=i("ul"),Ce.innerHTML=An,lo=n(),p(qe.$$.fragment),co=n(),W=i("div"),p(Fe.$$.fragment),Co=n(),nt=i("p"),nt.innerHTML=En,qo=n(),at=i("p"),at.innerHTML=Sn,Fo=n(),p(V.$$.fragment),mo=n(),p(je.$$.fragment),po=n(),F=i("div"),p(Ie.$$.fragment),jo=n(),st=i("p"),st.textContent=Nn,Io=n(),rt=i("div"),p(Pe.$$.fragment),Po=n(),X=i("div"),p(We.$$.fragment),Wo=n(),it=i("p"),it.innerHTML=Jn,Ho=n(),H=i("div"),p(He.$$.fragment),Uo=n(),lt=i("p"),lt.textContent=Rn,Ao=n(),p(Y.$$.fragment),Eo=n(),dt=i("p"),dt.textContent=Gn,So=n(),O=i("div"),p(Ue.$$.fragment),No=n(),ct=i("p"),ct.textContent=Dn,uo=n(),p(Ae.$$.fragment),ho=n(),y=i("div"),p(Ee.$$.fragment),Jo=n(),mt=i("p"),mt.textContent=Zn,Ro=n(),pt=i("p"),pt.textContent=Bn,Go=n(),p(Q.$$.fragment),Do=n(),ut=i("p"),ut.innerHTML=Vn,Zo=n(),ht=i("p"),ht.innerHTML=Xn,Bo=n(),ft=i("div"),p(Se.$$.fragment),Vo=n(),K=i("div"),p(Ne.$$.fragment),Xo=n(),gt=i("p"),gt.innerHTML=Yn,Yo=n(),S=i("div"),p(Je.$$.fragment),Oo=n(),_t=i("p"),_t.innerHTML=On,Qo=n(),bt=i("p"),bt.textContent=Qn,Ko=n(),ee=i("div"),p(Re.$$.fragment),en=n(),kt=i("p"),kt.innerHTML=Kn,tn=n(),vt=i("div"),p(Ge.$$.fragment),fo=n(),p(De.$$.fragment),go=n(),j=i("div"),p(Ze.$$.fragment),on=n(),yt=i("p"),yt.innerHTML=ea,nn=n(),Lt=i("p"),Lt.innerHTML=ta,an=n(),Tt=i("p"),Tt.innerHTML=oa,sn=n(),N=i("div"),p(Be.$$.fragment),rn=n(),$t=i("p"),$t.innerHTML=na,ln=n(),p(te.$$.fragment),_o=n(),p(Ve.$$.fragment),bo=n(),G=i("div"),p(Xe.$$.fragment),dn=n(),U=i("div"),p(Ye.$$.fragment),cn=n(),wt=i("p"),wt.innerHTML=aa,mn=n(),p(oe.$$.fragment),pn=n(),p(ne.$$.fragment),ko=n(),p(Oe.$$.fragment),vo=n(),z=i("div"),p(Qe.$$.fragment),un=n(),Mt=i("p"),Mt.textContent=sa,hn=n(),xt=i("p"),xt.innerHTML=ra,fn=n(),zt=i("p"),zt.innerHTML=ia,gn=n(),Ct=i("p"),Ct.innerHTML=la,_n=n(),qt=i("p"),qt.innerHTML=da,bn=n(),J=i("div"),p(Ke.$$.fragment),kn=n(),Ft=i("p"),Ft.innerHTML=ca,vn=n(),p(ae.$$.fragment),yo=n(),p(et.$$.fragment),Lo=n(),Ht=i("p"),this.h()},l(e){const t=ga("svelte-u9bgzb",document.head);d=l(t,"META",{name:!0,content:!0}),t.forEach(o),v=a(e),c=l(e,"P",{}),M(c).forEach(o),k=a(e),u(w.$$.fragment,e),b=a(e),u(T.$$.fragment,e),ie=a(e),$=l(e,"P",{"data-svelte-h":!0}),m($)!=="svelte-1mg2hun"&&($.innerHTML=Z),Et=a(e),le=l(e,"P",{"data-svelte-h":!0}),m(le)!=="svelte-e5r8wp"&&(le.textContent=yn),St=a(e),de=l(e,"P",{"data-svelte-h":!0}),m(de)!=="svelte-18r1nwx"&&(de.innerHTML=Ln),Nt=a(e),ce=l(e,"P",{"data-svelte-h":!0}),m(ce)!=="svelte-yxvvyr"&&(ce.innerHTML=Tn),Jt=a(e),u(B.$$.fragment,e),Rt=a(e),me=l(e,"P",{"data-svelte-h":!0}),m(me)!=="svelte-1u4pjic"&&(me.textContent=$n),Gt=a(e),pe=l(e,"UL",{"data-svelte-h":!0}),m(pe)!=="svelte-p2ih1q"&&(pe.innerHTML=wn),Dt=a(e),u(ue.$$.fragment,e),Zt=a(e),he=l(e,"UL",{"data-svelte-h":!0}),m(he)!=="svelte-11p27m1"&&(he.innerHTML=Mn),Bt=a(e),u(fe.$$.fragment,e),Vt=a(e),ge=l(e,"P",{"data-svelte-h":!0}),m(ge)!=="svelte-45syni"&&(ge.textContent=xn),Xt=a(e),_e=l(e,"UL",{"data-svelte-h":!0}),m(_e)!=="svelte-w3e8c3"&&(_e.innerHTML=zn),Yt=a(e),be=l(e,"P",{"data-svelte-h":!0}),m(be)!=="svelte-1yrcdop"&&(be.innerHTML=Cn),Ot=a(e),u(ke.$$.fragment,e),Qt=a(e),ve=l(e,"P",{"data-svelte-h":!0}),m(ve)!=="svelte-109c5g6"&&(ve.textContent=qn),Kt=a(e),ye=l(e,"UL",{"data-svelte-h":!0}),m(ye)!=="svelte-13tfjo2"&&(ye.innerHTML=Fn),eo=a(e),u(Le.$$.fragment,e),to=a(e),Te=l(e,"UL",{"data-svelte-h":!0}),m(Te)!=="svelte-18uwvgq"&&(Te.innerHTML=jn),oo=a(e),$e=l(e,"P",{"data-svelte-h":!0}),m($e)!=="svelte-14a1znp"&&($e.textContent=In),no=a(e),we=l(e,"UL",{"data-svelte-h":!0}),m(we)!=="svelte-10ni5hq"&&(we.innerHTML=Pn),ao=a(e),Me=l(e,"P",{"data-svelte-h":!0}),m(Me)!=="svelte-1x58uo"&&(Me.textContent=Wn),so=a(e),xe=l(e,"UL",{"data-svelte-h":!0}),m(xe)!=="svelte-a0syhq"&&(xe.innerHTML=Hn),ro=a(e),ze=l(e,"P",{"data-svelte-h":!0}),m(ze)!=="svelte-3z2x4b"&&(ze.textContent=Un),io=a(e),Ce=l(e,"UL",{"data-svelte-h":!0}),m(Ce)!=="svelte-jjnuxc"&&(Ce.innerHTML=An),lo=a(e),u(qe.$$.fragment,e),co=a(e),W=l(e,"DIV",{class:!0});var A=M(W);u(Fe.$$.fragment,A),Co=a(A),nt=l(A,"P",{"data-svelte-h":!0}),m(nt)!=="svelte-qcdesb"&&(nt.innerHTML=En),qo=a(A),at=l(A,"P",{"data-svelte-h":!0}),m(at)!=="svelte-huu8ef"&&(at.innerHTML=Sn),Fo=a(A),u(V.$$.fragment,A),A.forEach(o),mo=a(e),u(je.$$.fragment,e),po=a(e),F=l(e,"DIV",{class:!0});var I=M(F);u(Ie.$$.fragment,I),jo=a(I),st=l(I,"P",{"data-svelte-h":!0}),m(st)!=="svelte-qfiu5a"&&(st.textContent=Nn),Io=a(I),rt=l(I,"DIV",{class:!0});var Ut=M(rt);u(Pe.$$.fragment,Ut),Ut.forEach(o),Po=a(I),X=l(I,"DIV",{class:!0});var tt=M(X);u(We.$$.fragment,tt),Wo=a(tt),it=l(tt,"P",{"data-svelte-h":!0}),m(it)!=="svelte-1f4f5kp"&&(it.innerHTML=Jn),tt.forEach(o),Ho=a(I),H=l(I,"DIV",{class:!0});var E=M(H);u(He.$$.fragment,E),Uo=a(E),lt=l(E,"P",{"data-svelte-h":!0}),m(lt)!=="svelte-13bfd60"&&(lt.textContent=Rn),Ao=a(E),u(Y.$$.fragment,E),Eo=a(E),dt=l(E,"P",{"data-svelte-h":!0}),m(dt)!=="svelte-wtrslu"&&(dt.textContent=Gn),E.forEach(o),So=a(I),O=l(I,"DIV",{class:!0});var ot=M(O);u(Ue.$$.fragment,ot),No=a(ot),ct=l(ot,"P",{"data-svelte-h":!0}),m(ct)!=="svelte-1slb66l"&&(ct.textContent=Dn),ot.forEach(o),I.forEach(o),uo=a(e),u(Ae.$$.fragment,e),ho=a(e),y=l(e,"DIV",{class:!0});var L=M(y);u(Ee.$$.fragment,L),Jo=a(L),mt=l(L,"P",{"data-svelte-h":!0}),m(mt)!=="svelte-15tdcz8"&&(mt.textContent=Zn),Ro=a(L),pt=l(L,"P",{"data-svelte-h":!0}),m(pt)!=="svelte-llhmpa"&&(pt.textContent=Bn),Go=a(L),u(Q.$$.fragment,L),Do=a(L),ut=l(L,"P",{"data-svelte-h":!0}),m(ut)!=="svelte-cnb6q1"&&(ut.innerHTML=Vn),Zo=a(L),ht=l(L,"P",{"data-svelte-h":!0}),m(ht)!=="svelte-1ndfe3e"&&(ht.innerHTML=Xn),Bo=a(L),ft=l(L,"DIV",{class:!0});var At=M(ft);u(Se.$$.fragment,At),At.forEach(o),Vo=a(L),K=l(L,"DIV",{class:!0});var $o=M(K);u(Ne.$$.fragment,$o),Xo=a($o),gt=l($o,"P",{"data-svelte-h":!0}),m(gt)!=="svelte-1wmjg8a"&&(gt.innerHTML=Yn),$o.forEach(o),Yo=a(L),S=l(L,"DIV",{class:!0});var jt=M(S);u(Je.$$.fragment,jt),Oo=a(jt),_t=l(jt,"P",{"data-svelte-h":!0}),m(_t)!=="svelte-zj1vf1"&&(_t.innerHTML=On),Qo=a(jt),bt=l(jt,"P",{"data-svelte-h":!0}),m(bt)!=="svelte-9vptpw"&&(bt.textContent=Qn),jt.forEach(o),Ko=a(L),ee=l(L,"DIV",{class:!0});var wo=M(ee);u(Re.$$.fragment,wo),en=a(wo),kt=l(wo,"P",{"data-svelte-h":!0}),m(kt)!=="svelte-nfci2w"&&(kt.innerHTML=Kn),wo.forEach(o),tn=a(L),vt=l(L,"DIV",{class:!0});var ma=M(vt);u(Ge.$$.fragment,ma),ma.forEach(o),L.forEach(o),fo=a(e),u(De.$$.fragment,e),go=a(e),j=l(e,"DIV",{class:!0});var R=M(j);u(Ze.$$.fragment,R),on=a(R),yt=l(R,"P",{"data-svelte-h":!0}),m(yt)!=="svelte-16gi54p"&&(yt.innerHTML=ea),nn=a(R),Lt=l(R,"P",{"data-svelte-h":!0}),m(Lt)!=="svelte-hswkmf"&&(Lt.innerHTML=ta),an=a(R),Tt=l(R,"P",{"data-svelte-h":!0}),m(Tt)!=="svelte-eom0yk"&&(Tt.innerHTML=oa),sn=a(R),N=l(R,"DIV",{class:!0});var It=M(N);u(Be.$$.fragment,It),rn=a(It),$t=l(It,"P",{"data-svelte-h":!0}),m($t)!=="svelte-lzs74r"&&($t.innerHTML=na),ln=a(It),u(te.$$.fragment,It),It.forEach(o),R.forEach(o),_o=a(e),u(Ve.$$.fragment,e),bo=a(e),G=l(e,"DIV",{class:!0});var Mo=M(G);u(Xe.$$.fragment,Mo),dn=a(Mo),U=l(Mo,"DIV",{class:!0});var se=M(U);u(Ye.$$.fragment,se),cn=a(se),wt=l(se,"P",{"data-svelte-h":!0}),m(wt)!=="svelte-nzhrsr"&&(wt.innerHTML=aa),mn=a(se),u(oe.$$.fragment,se),pn=a(se),u(ne.$$.fragment,se),se.forEach(o),Mo.forEach(o),ko=a(e),u(Oe.$$.fragment,e),vo=a(e),z=l(e,"DIV",{class:!0});var P=M(z);u(Qe.$$.fragment,P),un=a(P),Mt=l(P,"P",{"data-svelte-h":!0}),m(Mt)!=="svelte-62must"&&(Mt.textContent=sa),hn=a(P),xt=l(P,"P",{"data-svelte-h":!0}),m(xt)!=="svelte-28mw80"&&(xt.innerHTML=ra),fn=a(P),zt=l(P,"P",{"data-svelte-h":!0}),m(zt)!=="svelte-10ugs3m"&&(zt.innerHTML=ia),gn=a(P),Ct=l(P,"P",{"data-svelte-h":!0}),m(Ct)!=="svelte-ehy44e"&&(Ct.innerHTML=la),_n=a(P),qt=l(P,"P",{"data-svelte-h":!0}),m(qt)!=="svelte-hswkmf"&&(qt.innerHTML=da),bn=a(P),J=l(P,"DIV",{class:!0});var Pt=M(J);u(Ke.$$.fragment,Pt),kn=a(Pt),Ft=l(Pt,"P",{"data-svelte-h":!0}),m(Ft)!=="svelte-1a1l04t"&&(Ft.innerHTML=ca),vn=a(Pt),u(ae.$$.fragment,Pt),Pt.forEach(o),P.forEach(o),yo=a(e),u(et.$$.fragment,e),Lo=a(e),Ht=l(e,"P",{}),M(Ht).forEach(o),this.h()},h(){x(d,"name","hf:doc:metadata"),x(d,"content",za),x(W,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(rt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(X,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(H,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(O,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(F,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(ft,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(K,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(S,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(ee,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(vt,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(y,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(N,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(j,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(U,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(G,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(J,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8"),x(z,"class","docstring border-l-2 border-t-2 pl-4 pt-3.5 border-gray-100 rounded-tl-xl mb-6 mt-8")},m(e,t){s(document.head,d),r(e,v,t),r(e,c,t),r(e,k,t),h(w,e,t),r(e,b,t),h(T,e,t),r(e,ie,t),r(e,$,t),r(e,Et,t),r(e,le,t),r(e,St,t),r(e,de,t),r(e,Nt,t),r(e,ce,t),r(e,Jt,t),h(B,e,t),r(e,Rt,t),r(e,me,t),r(e,Gt,t),r(e,pe,t),r(e,Dt,t),h(ue,e,t),r(e,Zt,t),r(e,he,t),r(e,Bt,t),h(fe,e,t),r(e,Vt,t),r(e,ge,t),r(e,Xt,t),r(e,_e,t),r(e,Yt,t),r(e,be,t),r(e,Ot,t),h(ke,e,t),r(e,Qt,t),r(e,ve,t),r(e,Kt,t),r(e,ye,t),r(e,eo,t),h(Le,e,t),r(e,to,t),r(e,Te,t),r(e,oo,t),r(e,$e,t),r(e,no,t),r(e,we,t),r(e,ao,t),r(e,Me,t),r(e,so,t),r(e,xe,t),r(e,ro,t),r(e,ze,t),r(e,io,t),r(e,Ce,t),r(e,lo,t),h(qe,e,t),r(e,co,t),r(e,W,t),h(Fe,W,null),s(W,Co),s(W,nt),s(W,qo),s(W,at),s(W,Fo),h(V,W,null),r(e,mo,t),h(je,e,t),r(e,po,t),r(e,F,t),h(Ie,F,null),s(F,jo),s(F,st),s(F,Io),s(F,rt),h(Pe,rt,null),s(F,Po),s(F,X),h(We,X,null),s(X,Wo),s(X,it),s(F,Ho),s(F,H),h(He,H,null),s(H,Uo),s(H,lt),s(H,Ao),h(Y,H,null),s(H,Eo),s(H,dt),s(F,So),s(F,O),h(Ue,O,null),s(O,No),s(O,ct),r(e,uo,t),h(Ae,e,t),r(e,ho,t),r(e,y,t),h(Ee,y,null),s(y,Jo),s(y,mt),s(y,Ro),s(y,pt),s(y,Go),h(Q,y,null),s(y,Do),s(y,ut),s(y,Zo),s(y,ht),s(y,Bo),s(y,ft),h(Se,ft,null),s(y,Vo),s(y,K),h(Ne,K,null),s(K,Xo),s(K,gt),s(y,Yo),s(y,S),h(Je,S,null),s(S,Oo),s(S,_t),s(S,Qo),s(S,bt),s(y,Ko),s(y,ee),h(Re,ee,null),s(ee,en),s(ee,kt),s(y,tn),s(y,vt),h(Ge,vt,null),r(e,fo,t),h(De,e,t),r(e,go,t),r(e,j,t),h(Ze,j,null),s(j,on),s(j,yt),s(j,nn),s(j,Lt),s(j,an),s(j,Tt),s(j,sn),s(j,N),h(Be,N,null),s(N,rn),s(N,$t),s(N,ln),h(te,N,null),r(e,_o,t),h(Ve,e,t),r(e,bo,t),r(e,G,t),h(Xe,G,null),s(G,dn),s(G,U),h(Ye,U,null),s(U,cn),s(U,wt),s(U,mn),h(oe,U,null),s(U,pn),h(ne,U,null),r(e,ko,t),h(Oe,e,t),r(e,vo,t),r(e,z,t),h(Qe,z,null),s(z,un),s(z,Mt),s(z,hn),s(z,xt),s(z,fn),s(z,zt),s(z,gn),s(z,Ct),s(z,_n),s(z,qt),s(z,bn),s(z,J),h(Ke,J,null),s(J,kn),s(J,Ft),s(J,vn),h(ae,J,null),r(e,yo,t),h(et,e,t),r(e,Lo,t),r(e,Ht,t),To=!0},p(e,[t]){const A={};t&2&&(A.$$scope={dirty:t,ctx:e}),B.$set(A);const I={};t&2&&(I.$$scope={dirty:t,ctx:e}),V.$set(I);const Ut={};t&2&&(Ut.$$scope={dirty:t,ctx:e}),Y.$set(Ut);const tt={};t&2&&(tt.$$scope={dirty:t,ctx:e}),Q.$set(tt);const E={};t&2&&(E.$$scope={dirty:t,ctx:e}),te.$set(E);const ot={};t&2&&(ot.$$scope={dirty:t,ctx:e}),oe.$set(ot);const L={};t&2&&(L.$$scope={dirty:t,ctx:e}),ne.$set(L);const At={};t&2&&(At.$$scope={dirty:t,ctx:e}),ae.$set(At)},i(e){To\|\|(f(w.$$.fragment,e),f(T.$$.fragment,e),f(B.$$.fragment,e),f(ue.$$.fragment,e),f(fe.$$.fragment,e),f(ke.$$.fragment,e),f(Le.$$.fragment,e),f(qe.$$.fragment,e),f(Fe.$$.fragment,e),f(V.$$.fragment,e),f(je.$$.fragment,e),f(Ie.$$.fragment,e),f(Pe.$$.fragment,e),f(We.$$.fragment,e),f(He.$$.fragment,e),f(Y.$$.fragment,e),f(Ue.$$.fragment,e),f(Ae.$$.fragment,e),f(Ee.$$.fragment,e),f(Q.$$.fragment,e),f(Se.$$.fragment,e),f(Ne.$$.fragment,e),f(Je.$$.fragment,e),f(Re.$$.fragment,e),f(Ge.$$.fragment,e),f(De.$$.fragment,e),f(Ze.$$.fragment,e),f(Be.$$.fragment,e),f(te.$$.fragment,e),f(Ve.$$.fragment,e),f(Xe.$$.fragment,e),f(Ye.$$.fragment,e),f(oe.$$.fragment,e),f(ne.$$.fragment,e),f(Oe.$$.fragment,e),f(Qe.$$.fragment,e),f(Ke.$$.fragment,e),f(ae.$$.fragment,e),f(et.$$.fragment,e),To=!0)},o(e){g(w.$$.fragment,e),g(T.$$.fragment,e),g(B.$$.fragment,e),g(ue.$$.fragment,e),g(fe.$$.fragment,e),g(ke.$$.fragment,e),g(Le.$$.fragment,e),g(qe.$$.fragment,e),g(Fe.$$.fragment,e),g(V.$$.fragment,e),g(je.$$.fragment,e),g(Ie.$$.fragment,e),g(Pe.$$.fragment,e),g(We.$$.fragment,e),g(He.$$.fragment,e),g(Y.$$.fragment,e),g(Ue.$$.fragment,e),g(Ae.$$.fragment,e),g(Ee.$$.fragment,e),g(Q.$$.fragment,e),g(Se.$$.fragment,e),g(Ne.$$.fragment,e),g(Je.$$.fragment,e),g(Re.$$.fragment,e),g(Ge.$$.fragment,e),g(De.$$.fragment,e),g(Ze.$$.fragment,e),g(Be.$$.fragment,e),g(te.$$.fragment,e),g(Ve.$$.fragment,e),g(Xe.$$.fragment,e),g(Ye.$$.fragment,e),g(oe.$$.fragment,e),g(ne.$$.fragment,e),g(Oe.$$.fragment,e),g(Qe.$$.fragment,e),g(Ke.$$.fragment,e),g(ae.$$.fragment,e),g(et.$$.fragment,e),To=!1},d(e){e&&(o(v),o(c),o(k),o(b),o(ie),o($),o(Et),o(le),o(St),o(de),o(Nt),o(ce),o(Jt),o(Rt),o(me),o(Gt),o(pe),o(Dt),o(Zt),o(he),o(Bt),o(Vt),o(ge),o(Xt),o(_e),o(Yt),o(be),o(Ot),o(Qt),o(ve),o(Kt),o(ye),o(eo),o(to),o(Te),o(oo),o($e),o(no),o(we),o(ao),o(Me),o(so),o(xe),o(ro),o(ze),o(io),o(Ce),o(lo),o(co),o(W),o(mo),o(po),o(F),o(uo),o(ho),o(y),o(fo),o(go),o(j),o(_o),o(bo),o(G),o(ko),o(vo),o(z),o(yo),o(Lo),o(Ht)),o(d),_(w,e),_(T,e),_(B,e),_(ue,e),_(fe,e),_(ke,e),_(Le,e),_(qe,e),_(Fe),_(V),_(je,e),_(Ie),_(Pe),_(We),_(He),_(Y),_(Ue),_(Ae,e),_(Ee),_(Q),_(Se),_(Ne),_(Je),_(Re),_(Ge),_(De,e),_(Ze),_(Be),_(te),_(Ve,e),_(Xe),_(Ye),_(oe),_(ne),_(Oe,e),_(Qe),_(Ke),_(ae),_(et,e)}}}const za='{"title":"Llama2","local":"llama2","sections":[{"title":"개요","local":"overview","sections":[],"depth":2},{"title":"리소스","local":"resources","sections":[],"depth":2},{"title":"LlamaConfig","local":"llamaconfig ][ transformers.LlamaConfig","sections":[],"depth":2},{"title":"LlamaTokenizer","local":"llamatokenizer ][ transformers.LlamaTokenizer","sections":[],"depth":2},{"title":"LlamaTokenizerFast","local":"llamatokenizerfast ][ transformers.LlamaTokenizerFast","sections":[],"depth":2},{"title":"LlamaModel","local":"llamamodel ][ transformers.LlamaModel","sections":[],"depth":2},{"title":"LlamaForCausalLM","local":"llamaforcausallm ][ transformers.LlamaForCausalLM","sections":[],"depth":2},{"title":"LlamaForSequenceClassification","local":"llamaforsequenceclassification ][ transformers.LlamaForSequenceClassification","sections":[],"depth":2}],"depth":1}';function Ca(C){return ua(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Ua extends ha{constructor(d){super(),fa(this,d,Ca,xa,pa,{})}}export{Ua as component};

Xet Storage Details

Size:: 97.6 kB
Xet hash:: f82b214914f9e594374a8607d3b2a71d73fd6aefc663ca880672570350dd358d

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.