Buckets:

rtrm's picture
download
raw
88.9 kB
import{s as Jn,o as yn,n as wn}from"../chunks/scheduler.56730f09.js";import{S as Tn,i as un,g as p,s as t,r as j,A as Cn,h as i,f as a,c as e,j as dn,u as r,x as c,k as fa,y as bn,a as n,v as M,d as h,t as o,w as x}from"../chunks/index.1f144517.js";import{T as fn}from"../chunks/Tip.41e845e5.js";import{C as g}from"../chunks/CodeBlock.738eeccb.js";import{D as In}from"../chunks/DocNotebookDropdown.b2e82107.js";import{H as C}from"../chunks/Heading.57d46534.js";function _n(Os){let U,b="이 튜토리얼에서 설명하는 태스크는 다음과 같은 모델 아키텍처에서 지원됩니다:",y,d,w='<a href="../model_doc/layoutlm">LayoutLM</a>, <a href="../model_doc/layoutlmv2">LayoutLMv2</a>, <a href="../model_doc/layoutlmv3">LayoutLMv3</a>';return{c(){U=p("p"),U.textContent=b,y=t(),d=p("p"),d.innerHTML=w},l(m){U=i(m,"P",{"data-svelte-h":!0}),c(U)!=="svelte-6xt7gu"&&(U.textContent=b),y=e(m),d=i(m,"P",{"data-svelte-h":!0}),c(d)!=="svelte-4nltzx"&&(d.innerHTML=w)},m(m,J){n(m,U,J),n(m,y,J),n(m,d,J)},p:wn,d(m){m&&(a(U),a(y),a(d))}}}function Vn(Os){let U,b,y,d,w,m,J,Ps,f,Ia=`문서 시각적 질의 응답(Document Visual Question Answering)이라고도 하는
문서 질의 응답(Document Question Answering)은 문서 이미지에 대한 질문에 답변을 주는 태스크입니다.
이 태스크를 지원하는 모델의 입력은 일반적으로 이미지와 질문의 조합이고, 출력은 자연어로 된 답변입니다. 이러한 모델은 텍스트, 단어의 위치(바운딩 박스), 이미지 등 다양한 모달리티를 활용합니다.`,Ks,I,_a="이 가이드는 다음 내용을 설명합니다:",sl,_,Va='<li><a href="https://huggingface.co/datasets/nielsr/docvqa_1200_examples_donut" rel="nofollow">DocVQA dataset</a>을 사용해 <a href="../model_doc/layoutlmv2">LayoutLMv2</a> 미세 조정하기</li> <li>추론을 위해 미세 조정된 모델을 사용하기</li>',ll,T,al,V,Ra=`LayoutLMv2는 토큰의 마지막 은닉층 위에 질의 응답 헤드를 추가해 답변의 시작 토큰과 끝 토큰의 위치를 예측함으로써 문서 질의 응답 태스크를 해결합니다. 즉, 문맥이 주어졌을 때 질문에 답하는 정보를 추출하는 추출형 질의 응답(Extractive question answering)으로 문제를 처리합니다.
문맥은 OCR 엔진의 출력에서 가져오며, 여기서는 Google의 Tesseract를 사용합니다.`,nl,R,ka="시작하기 전에 필요한 라이브러리가 모두 설치되어 있는지 확인하세요. LayoutLMv2는 detectron2, torchvision 및 테서랙트를 필요로 합니다.",tl,k,el,Z,pl,Q,il,A,Za="필요한 라이브러리들을 모두 설치한 후 런타임을 다시 시작합니다.",cl,$,Qa=`커뮤니티에 당신의 모델을 공유하는 것을 권장합니다. Hugging Face 계정에 로그인해서 모델을 🤗 Hub에 업로드하세요.
프롬프트가 실행되면, 로그인을 위해 토큰을 입력하세요:`,jl,X,rl,v,Aa="몇 가지 전역 변수를 정의해 보겠습니다.",Ml,N,hl,G,ol,E,$a=`이 가이드에서는 🤗 Hub에서 찾을 수 있는 전처리된 DocVQA의 작은 샘플을 사용합니다.
DocVQA의 전체 데이터 세트를 사용하고 싶다면, <a href="https://rrc.cvc.uab.es/?ch=17" rel="nofollow">DocVQA homepage</a>에 가입 후 다운로드 할 수 있습니다. 전체 데이터 세트를 다운로드 했다면, 이 가이드를 계속 진행하기 위해 <a href="https://huggingface.co/docs/datasets/loading#local-and-remote-files" rel="nofollow">🤗 dataset에 파일을 가져오는 방법</a>을 확인하세요.`,xl,B,gl,F,Xa="보시다시피, 데이터 세트는 이미 훈련 세트와 테스트 세트로 나누어져 있습니다. 무작위로 예제를 살펴보면서 특성을 확인해보세요.",Ul,W,ml,z,va="각 필드가 나타내는 내용은 다음과 같습니다:",dl,q,Na="<li><code>id</code>: 예제의 id</li> <li><code>image</code>: 문서 이미지를 포함하는 PIL.Image.Image 객체</li> <li><code>query</code>: 질문 문자열 - 여러 언어의 자연어로 된 질문</li> <li><code>answers</code>: 사람이 주석을 단 정답 리스트</li> <li><code>words</code> and <code>bounding_boxes</code>: OCR의 결과값들이며 이 가이드에서는 사용하지 않을 예정</li> <li><code>answer</code>: 다른 모델과 일치하는 답변이며 이 가이드에서는 사용하지 않을 예정</li>",Jl,S,Ga=`영어로 된 질문만 남기고 다른 모델에 대한 예측을 포함하는 <code>answer</code> 특성을 삭제하겠습니다.
그리고 주석 작성자가 제공한 데이터 세트에서 첫 번째 답변을 가져옵니다. 또는 무작위로 샘플을 추출할 수도 있습니다.`,yl,Y,wl,H,Ea=`이 가이드에서 사용하는 LayoutLMv2 체크포인트는 <code>max_position_embeddings = 512</code>로 훈련되었습니다(이 정보는 <a href="https://huggingface.co/microsoft/layoutlmv2-base-uncased/blob/main/config.json#L18" rel="nofollow">체크포인트의 <code>config.json</code> 파일</a>에서 확인할 수 있습니다).
바로 예제를 잘라낼 수도 있지만, 긴 문서의 끝에 답변이 있어 잘리는 상황을 피하기 위해 여기서는 임베딩이 512보다 길어질 가능성이 있는 몇 가지 예제를 제거하겠습니다.
데이터 세트에 있는 대부분의 문서가 긴 경우 슬라이딩 윈도우 방법을 사용할 수 있습니다 - 자세한 내용을 확인하고 싶으면 이 <a href="https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb" rel="nofollow">노트북</a>을 확인하세요.`,Tl,D,ul,L,Ba=`이 시점에서 이 데이터 세트의 OCR 특성도 제거해 보겠습니다. OCR 특성은 다른 모델을 미세 조정하기 위한 것으로, 이 가이드에서 사용하는 모델의 입력 요구 사항과 일치하지 않기 때문에 이 특성을 사용하기 위해서는 일부 처리가 필요합니다.
대신, 원본 데이터에 <code>LayoutLMv2Processor</code>를 사용하여 OCR 및 토큰화를 모두 수행할 수 있습니다.
이렇게 하면 모델이 요구하는 입력을 얻을 수 있습니다.
이미지를 수동으로 처리하려면, <a href="../model_doc/layoutlmv2"><code>LayoutLMv2</code> model documentation</a>에서 모델이 요구하는 입력 포맷을 확인해보세요.`,Cl,O,bl,P,Fa="마지막으로, 데이터 탐색을 완료하기 위해 이미지 예시를 살펴봅시다.",fl,K,Il,u,Wa='<img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/docvqa_example.jpg" alt="DocVQA Image Example"/>',_l,ss,Vl,ls,za=`문서 질의 응답 태스크는 멀티모달 태스크이며, 각 모달리티의 입력이 모델의 요구에 맞게 전처리 되었는지 확인해야 합니다.
이미지 데이터를 처리할 수 있는 이미지 프로세서와 텍스트 데이터를 인코딩할 수 있는 토크나이저를 결합한 <code>LayoutLMv2Processor</code>를 가져오는 것부터 시작해 보겠습니다.`,Rl,as,kl,ns,Zl,ts,qa=`먼저, 프로세서의 <code>image_processor</code>를 사용해 모델에 대한 문서 이미지를 준비해 보겠습니다.
기본값으로, 이미지 프로세서는 이미지 크기를 224x224로 조정하고 색상 채널의 순서가 올바른지 확인한 후 단어와 정규화된 바운딩 박스를 얻기 위해 테서랙트를 사용해 OCR를 적용합니다.
이 튜토리얼에서 우리가 필요한 것과 기본값은 완전히 동일합니다. 이미지 배치에 기본 이미지 처리를 적용하고 OCR의 결과를 변환하는 함수를 작성합니다.`,Ql,es,Al,ps,Sa="이 전처리를 데이터 세트 전체에 빠르게 적용하려면 <code>map</code>를 사용하세요.",$l,is,Xl,cs,vl,js,Ya=`이미지에 OCR을 적용했으면 데이터 세트의 텍스트 부분을 모델에 맞게 인코딩해야 합니다.
이 인코딩에는 이전 단계에서 가져온 단어와 박스를 토큰 수준의 <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code> 및 <code>bbox</code>로 변환하는 작업이 포함됩니다.
텍스트를 전처리하려면 프로세서의 <code>tokenizer</code>가 필요합니다.`,Nl,rs,Gl,Ms,Ha="위에서 언급한 전처리 외에도 모델을 위해 레이블을 추가해야 합니다. 🤗 Transformers의 <code>xxxForQuestionAnswering</code> 모델의 경우, 레이블은 <code>start_positions</code>와 <code>end_positions</code>로 구성되며 어떤 토큰이 답변의 시작과 끝에 있는지를 나타냅니다.",El,hs,Da="레이블 추가를 위해서, 먼저 더 큰 리스트(단어 리스트)에서 하위 리스트(단어로 분할된 답변)을 찾을 수 있는 헬퍼 함수를 정의합니다.",Bl,os,La=`이 함수는 <code>words_list</code>와 <code>answer_list</code>, 이렇게 두 리스트를 입력으로 받습니다.
그런 다음 <code>words_list</code>를 반복하여 <code>words_list</code>의 현재 단어(words_list[i])가 <code>answer_list</code>의 첫 번째 단어(answer_list[0])와 같은지,
현재 단어에서 시작해 <code>answer_list</code>와 같은 길이만큼의 <code>words_list</code>의 하위 리스트가 <code>answer_list</code>와 일치하는지 확인합니다.
이 조건이 참이라면 일치하는 항목을 발견했음을 의미하며, 함수는 일치 항목, 시작 인덱스(idx) 및 종료 인덱스(idx + len(answer_list) - 1)를 기록합니다. 일치하는 항목이 두 개 이상 발견되면 함수는 첫 번째 항목만 반환합니다. 일치하는 항목이 없다면 함수는 (<code>None</code>, 0, 0)을 반환합니다.`,Fl,xs,Wl,gs,Oa="이 함수가 어떻게 정답의 위치를 찾는지 설명하기 위해 다음 예제에서 함수를 사용해 보겠습니다:",zl,Us,ql,ms,Pa="한편, 위 예제가 인코딩되면 다음과 같이 표시됩니다:",Sl,ds,Yl,Js,Ka="이제 인코딩된 입력에서 정답의 위치를 찾아야 합니다.",Hl,ys,sn="<li><code>token_type_ids</code>는 어떤 토큰이 질문에 속하는지, 그리고 어떤 토큰이 문서의 단어에 포함되는지를 알려줍니다.</li> <li><code>tokenizer.cls_token_id</code> 입력의 시작 부분에 있는 특수 토큰을 찾는 데 도움을 줍니다.</li> <li><code>word_ids</code>는 원본 <code>words</code>에서 찾은 답변을 전체 인코딩된 입력의 동일한 답과 일치시키고 인코딩된 입력에서 답변의 시작/끝 위치를 결정합니다.</li>",Dl,ws,ln="위 내용들을 염두에 두고 데이터 세트 예제의 배치를 인코딩하는 함수를 만들어 보겠습니다:",Ll,Ts,Ol,us,an="이제 이 전처리 함수가 있으니 전체 데이터 세트를 인코딩할 수 있습니다:",Pl,Cs,Kl,bs,nn="인코딩된 데이터 세트의 특성이 어떻게 생겼는지 확인해 보겠습니다:",sa,fs,la,Is,aa,_s,tn=`문서 질의 응답을 평가하려면 상당한 양의 후처리가 필요합니다. 시간이 너무 많이 걸리지 않도록 이 가이드에서는 평가 단계를 생략합니다.
<code>Trainer</code>가 훈련 과정에서 평가 손실(evaluation loss)을 계속 계산하기 때문에 모델의 성능을 대략적으로 알 수 있습니다.
추출적(Extractive) 질의 응답은 보통 F1/exact match 방법을 사용해 평가됩니다.
직접 구현해보고 싶으시다면, Hugging Face course의 <a href="https://huggingface.co/course/chapter7/7?fw=pt#postprocessing" rel="nofollow">Question Answering chapter</a>을 참고하세요.`,na,Vs,ta,Rs,en=`축하합니다! 이 가이드의 가장 어려운 부분을 성공적으로 처리했으니 이제 나만의 모델을 훈련할 준비가 되었습니다.
훈련은 다음과 같은 단계로 이루어져 있습니다:`,ea,ks,pn="<li>전처리에서의 동일한 체크포인트를 사용하기 위해 <code>AutoModelForDocumentQuestionAnswering</code>으로 모델을 가져옵니다.</li> <li><code>TrainingArguments</code>로 훈련 하이퍼파라미터를 정합니다.</li> <li>예제를 배치 처리하는 함수를 정의합니다. 여기서는 <code>DefaultDataCollator</code>가 적당합니다.</li> <li>모델, 데이터 세트, 데이터 콜레이터(Data collator)와 함께 <code>Trainer</code>에 훈련 인수들을 전달합니다.</li> <li><code>train()</code>을 호출해서 모델을 미세 조정합니다.</li>",pa,Zs,ia,Qs,cn=`<code>TrainingArguments</code>에서 <code>output_dir</code>을 사용하여 모델을 저장할 위치를 지정하고, 적절한 하이퍼파라미터를 설정합니다.
모델을 커뮤니티와 공유하려면 <code>push_to_hub</code>를 <code>True</code>로 설정하세요 (모델을 업로드하려면 Hugging Face에 로그인해야 합니다).
이 경우 <code>output_dir</code>은 모델의 체크포인트를 푸시할 레포지토리의 이름이 됩니다.`,ca,As,ja,$s,jn="간단한 데이터 콜레이터를 정의하여 예제를 함께 배치합니다.",ra,Xs,Ma,vs,rn="마지막으로, 모든 것을 한 곳에 모아 <code>train()</code>을 호출합니다:",ha,Ns,oa,Gs,Mn="최종 모델을 🤗 Hub에 추가하려면, 모델 카드를 생성하고 <code>push_to_hub</code>를 호출합니다:",xa,Es,ga,Bs,Ua,Fs,hn=`이제 LayoutLMv2 모델을 미세 조정하고 🤗 Hub에 업로드했으니 추론에도 사용할 수 있습니다.
추론을 위해 미세 조정된 모델을 사용해 보는 가장 간단한 방법은 <code>Pipeline</code>을 사용하는 것 입니다.`,ma,Ws,on="예를 들어 보겠습니다:",da,zs,Ja,qs,xn="그 다음, 모델로 문서 질의 응답을 하기 위해 파이프라인을 인스턴스화하고 이미지 + 질문 조합을 전달합니다.",ya,Ss,wa,Ys,gn="원한다면 파이프라인의 결과를 수동으로 복제할 수도 있습니다:",Ta,Hs,Un="<li>이미지와 질문을 가져와 모델의 프로세서를 사용해 모델에 맞게 준비합니다.</li> <li>모델을 통해 결과 또는 전처리를 전달합니다.</li> <li>모델은 어떤 토큰이 답변의 시작에 있는지, 어떤 토큰이 답변이 끝에 있는지를 나타내는 <code>start_logits</code>와 <code>end_logits</code>를 반환합니다. 둘 다 (batch_size, sequence_length) 형태를 갖습니다.</li> <li><code>start_logits</code>와 <code>end_logits</code>의 마지막 차원을 최대로 만드는 값을 찾아 예상 <code>start_idx</code>와 <code>end_idx</code>를 얻습니다.</li> <li>토크나이저로 답변을 디코딩합니다.</li>",ua,Ds,Ca,Ls,ba;return w=new C({props:{title:"문서 질의 응답(Document Question Answering)",local:"document_question_answering",headingTag:"h1"}}),J=new In({props:{classNames:"absolute z-10 right-0 top-0",options:[{label:"Mixed",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/ko/document_question_answering.ipynb"},{label:"PyTorch",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/ko/pytorch/document_question_answering.ipynb"},{label:"TensorFlow",value:"https://colab.research.google.com/github/huggingface/notebooks/blob/main/transformers_doc/ko/tensorflow/document_question_answering.ipynb"},{label:"Mixed",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/ko/document_question_answering.ipynb"},{label:"PyTorch",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/ko/pytorch/document_question_answering.ipynb"},{label:"TensorFlow",value:"https://studiolab.sagemaker.aws/import/github/huggingface/notebooks/blob/main/transformers_doc/ko/tensorflow/document_question_answering.ipynb"}]}}),T=new fn({props:{$$slots:{default:[_n]},$$scope:{ctx:Os}}}),k=new g({props:{code:"cGlwJTIwaW5zdGFsbCUyMC1xJTIwdHJhbnNmb3JtZXJzJTIwZGF0YXNldHM=",highlighted:"pip install -q transformers datasets",wrap:!1}}),Z=new g({props:{code:"cGlwJTIwaW5zdGFsbCUyMCdnaXQlMkJodHRwcyUzQSUyRiUyRmdpdGh1Yi5jb20lMkZmYWNlYm9va3Jlc2VhcmNoJTJGZGV0ZWN0cm9uMi5naXQnJTBBcGlwJTIwaW5zdGFsbCUyMHRvcmNodmlzaW9u",highlighted:`pip install <span class="hljs-string">&#x27;git+https://github.com/facebookresearch/detectron2.git&#x27;</span>
pip install torchvision`,wrap:!1}}),Q=new g({props:{code:"c3VkbyUyMGFwdCUyMGluc3RhbGwlMjB0ZXNzZXJhY3Qtb2NyJTBBcGlwJTIwaW5zdGFsbCUyMC1xJTIwcHl0ZXNzZXJhY3Q=",highlighted:`sudo apt install tesseract-ocr
pip install -q pytesseract`,wrap:!1}}),X=new g({props:{code:"ZnJvbSUyMGh1Z2dpbmdmYWNlX2h1YiUyMGltcG9ydCUyMG5vdGVib29rX2xvZ2luJTBBJTBBbm90ZWJvb2tfbG9naW4oKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
<span class="hljs-meta">&gt;&gt;&gt; </span>notebook_login()`,wrap:!1}}),N=new g({props:{code:"bW9kZWxfY2hlY2twb2ludCUyMCUzRCUyMCUyMm1pY3Jvc29mdCUyRmxheW91dGxtdjItYmFzZS11bmNhc2VkJTIyJTBBYmF0Y2hfc2l6ZSUyMCUzRCUyMDQ=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>model_checkpoint = <span class="hljs-string">&quot;microsoft/layoutlmv2-base-uncased&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>batch_size = <span class="hljs-number">4</span>`,wrap:!1}}),G=new C({props:{title:"데이터 불러오기",local:"load-the-data",headingTag:"h2"}}),B=new g({props:{code:"ZnJvbSUyMGRhdGFzZXRzJTIwaW1wb3J0JTIwbG9hZF9kYXRhc2V0JTBBJTBBZGF0YXNldCUyMCUzRCUyMGxvYWRfZGF0YXNldCglMjJuaWVsc3IlMkZkb2N2cWFfMTIwMF9leGFtcGxlcyUyMiklMEFkYXRhc2V0",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;nielsr/docvqa_1200_examples&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset
DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;query&#x27;</span>, <span class="hljs-string">&#x27;answers&#x27;</span>, <span class="hljs-string">&#x27;words&#x27;</span>, <span class="hljs-string">&#x27;bounding_boxes&#x27;</span>, <span class="hljs-string">&#x27;answer&#x27;</span>],
num_rows: <span class="hljs-number">1000</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;query&#x27;</span>, <span class="hljs-string">&#x27;answers&#x27;</span>, <span class="hljs-string">&#x27;words&#x27;</span>, <span class="hljs-string">&#x27;bounding_boxes&#x27;</span>, <span class="hljs-string">&#x27;answer&#x27;</span>],
num_rows: <span class="hljs-number">200</span>
})
})`,wrap:!1}}),W=new g({props:{code:"ZGF0YXNldCU1QiUyMnRyYWluJTIyJTVELmZlYXR1cmVz",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-string">&quot;train&quot;</span>].features',wrap:!1}}),Y=new g({props:{code:"dXBkYXRlZF9kYXRhc2V0JTIwJTNEJTIwZGF0YXNldC5tYXAobGFtYmRhJTIwZXhhbXBsZSUzQSUyMCU3QiUyMnF1ZXN0aW9uJTIyJTNBJTIwZXhhbXBsZSU1QiUyMnF1ZXJ5JTIyJTVEJTVCJTIyZW4lMjIlNUQlN0QlMkMlMjByZW1vdmVfY29sdW1ucyUzRCU1QiUyMnF1ZXJ5JTIyJTVEKSUwQXVwZGF0ZWRfZGF0YXNldCUyMCUzRCUyMHVwZGF0ZWRfZGF0YXNldC5tYXAoJTBBJTIwJTIwJTIwJTIwbGFtYmRhJTIwZXhhbXBsZSUzQSUyMCU3QiUyMmFuc3dlciUyMiUzQSUyMGV4YW1wbGUlNUIlMjJhbnN3ZXJzJTIyJTVEJTVCMCU1RCU3RCUyQyUyMHJlbW92ZV9jb2x1bW5zJTNEJTVCJTIyYW5zd2VyJTIyJTJDJTIwJTIyYW5zd2VycyUyMiU1RCUwQSk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = dataset.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> example: {<span class="hljs-string">&quot;question&quot;</span>: example[<span class="hljs-string">&quot;query&quot;</span>][<span class="hljs-string">&quot;en&quot;</span>]}, remove_columns=[<span class="hljs-string">&quot;query&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> <span class="hljs-keyword">lambda</span> example: {<span class="hljs-string">&quot;answer&quot;</span>: example[<span class="hljs-string">&quot;answers&quot;</span>][<span class="hljs-number">0</span>]}, remove_columns=[<span class="hljs-string">&quot;answer&quot;</span>, <span class="hljs-string">&quot;answers&quot;</span>]
<span class="hljs-meta">... </span>)`,wrap:!1}}),D=new g({props:{code:"dXBkYXRlZF9kYXRhc2V0JTIwJTNEJTIwdXBkYXRlZF9kYXRhc2V0LmZpbHRlcihsYW1iZGElMjB4JTNBJTIwbGVuKHglNUIlMjJ3b3JkcyUyMiU1RCklMjAlMkIlMjBsZW4oeCU1QiUyMnF1ZXN0aW9uJTIyJTVELnNwbGl0KCkpJTIwJTNDJTIwNTEyKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;words&quot;</span>]) + <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;question&quot;</span>].split()) &lt; <span class="hljs-number">512</span>)',wrap:!1}}),O=new g({props:{code:"dXBkYXRlZF9kYXRhc2V0JTIwJTNEJTIwdXBkYXRlZF9kYXRhc2V0LnJlbW92ZV9jb2x1bW5zKCUyMndvcmRzJTIyKSUwQXVwZGF0ZWRfZGF0YXNldCUyMCUzRCUyMHVwZGF0ZWRfZGF0YXNldC5yZW1vdmVfY29sdW1ucyglMjJib3VuZGluZ19ib3hlcyUyMik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.remove_columns(<span class="hljs-string">&quot;words&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.remove_columns(<span class="hljs-string">&quot;bounding_boxes&quot;</span>)`,wrap:!1}}),K=new g({props:{code:"dXBkYXRlZF9kYXRhc2V0JTVCJTIydHJhaW4lMjIlNUQlNUIxMSU1RCU1QiUyMmltYWdlJTIyJTVE",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">11</span>][<span class="hljs-string">&quot;image&quot;</span>]',wrap:!1}}),ss=new C({props:{title:"데이터 전처리",local:"preprocess-the-data",headingTag:"h2"}}),as=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Qcm9jZXNzb3IlMEElMEFwcm9jZXNzb3IlMjAlM0QlMjBBdXRvUHJvY2Vzc29yLmZyb21fcHJldHJhaW5lZChtb2RlbF9jaGVja3BvaW50KQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(model_checkpoint)`,wrap:!1}}),ns=new C({props:{title:"문서 이미지 전처리",local:"preprocessing-document-images",headingTag:"h3"}}),es=new g({props:{code:"aW1hZ2VfcHJvY2Vzc29yJTIwJTNEJTIwcHJvY2Vzc29yLmltYWdlX3Byb2Nlc3NvciUwQSUwQSUwQWRlZiUyMGdldF9vY3Jfd29yZHNfYW5kX2JveGVzKGV4YW1wbGVzKSUzQSUwQSUyMCUyMCUyMCUyMGltYWdlcyUyMCUzRCUyMCU1QmltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKSUyMGZvciUyMGltYWdlJTIwaW4lMjBleGFtcGxlcyU1QiUyMmltYWdlJTIyJTVEJTVEJTBBJTIwJTIwJTIwJTIwZW5jb2RlZF9pbnB1dHMlMjAlM0QlMjBpbWFnZV9wcm9jZXNzb3IoaW1hZ2VzKSUwQSUwQSUyMCUyMCUyMCUyMGV4YW1wbGVzJTVCJTIyaW1hZ2UlMjIlNUQlMjAlM0QlMjBlbmNvZGVkX2lucHV0cy5waXhlbF92YWx1ZXMlMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMndvcmRzJTIyJTVEJTIwJTNEJTIwZW5jb2RlZF9pbnB1dHMud29yZHMlMEElMjAlMjAlMjAlMjBleGFtcGxlcyU1QiUyMmJveGVzJTIyJTVEJTIwJTNEJTIwZW5jb2RlZF9pbnB1dHMuYm94ZXMlMEElMEElMjAlMjAlMjAlMjByZXR1cm4lMjBleGFtcGxlcw==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = processor.image_processor
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">get_ocr_words_and_boxes</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> images = [image.convert(<span class="hljs-string">&quot;RGB&quot;</span>) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> encoded_inputs = image_processor(images)
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;image&quot;</span>] = encoded_inputs.pixel_values
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;words&quot;</span>] = encoded_inputs.words
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;boxes&quot;</span>] = encoded_inputs.boxes
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples`,wrap:!1}}),is=new g({props:{code:"ZGF0YXNldF93aXRoX29jciUyMCUzRCUyMHVwZGF0ZWRfZGF0YXNldC5tYXAoZ2V0X29jcl93b3Jkc19hbmRfYm94ZXMlMkMlMjBiYXRjaGVkJTNEVHJ1ZSUyQyUyMGJhdGNoX3NpemUlM0QyKQ==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>dataset_with_ocr = updated_dataset.<span class="hljs-built_in">map</span>(get_ocr_words_and_boxes, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>)',wrap:!1}}),cs=new C({props:{title:"텍스트 데이터 전처리",local:"preprocessing-text-data",headingTag:"h3"}}),rs=new g({props:{code:"dG9rZW5pemVyJTIwJTNEJTIwcHJvY2Vzc29yLnRva2VuaXplcg==",highlighted:'<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = processor.tokenizer',wrap:!1}}),xs=new g({props:{code:"ZGVmJTIwc3ViZmluZGVyKHdvcmRzX2xpc3QlMkMlMjBhbnN3ZXJfbGlzdCklM0ElMEElMjAlMjAlMjAlMjBtYXRjaGVzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwc3RhcnRfaW5kaWNlcyUyMCUzRCUyMCU1QiU1RCUwQSUyMCUyMCUyMCUyMGVuZF9pbmRpY2VzJTIwJTNEJTIwJTVCJTVEJTBBJTIwJTIwJTIwJTIwZm9yJTIwaWR4JTJDJTIwaSUyMGluJTIwZW51bWVyYXRlKHJhbmdlKGxlbih3b3Jkc19saXN0KSkpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwaWYlMjB3b3Jkc19saXN0JTVCaSU1RCUyMCUzRCUzRCUyMGFuc3dlcl9saXN0JTVCMCU1RCUyMGFuZCUyMHdvcmRzX2xpc3QlNUJpJTIwJTNBJTIwaSUyMCUyQiUyMGxlbihhbnN3ZXJfbGlzdCklNUQlMjAlM0QlM0QlMjBhbnN3ZXJfbGlzdCUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMG1hdGNoZXMuYXBwZW5kKGFuc3dlcl9saXN0KSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0X2luZGljZXMuYXBwZW5kKGlkeCklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBlbmRfaW5kaWNlcy5hcHBlbmQoaWR4JTIwJTJCJTIwbGVuKGFuc3dlcl9saXN0KSUyMC0lMjAxKSUwQSUyMCUyMCUyMCUyMGlmJTIwbWF0Y2hlcyUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHJldHVybiUyMG1hdGNoZXMlNUIwJTVEJTJDJTIwc3RhcnRfaW5kaWNlcyU1QjAlNUQlMkMlMjBlbmRfaW5kaWNlcyU1QjAlNUQlMEElMjAlMjAlMjAlMjBlbHNlJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwcmV0dXJuJTIwTm9uZSUyQyUyMDAlMkMlMjAw",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">subfinder</span>(<span class="hljs-params">words_list, answer_list</span>):
<span class="hljs-meta">... </span> matches = []
<span class="hljs-meta">... </span> start_indices = []
<span class="hljs-meta">... </span> end_indices = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> idx, i <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(<span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(words_list))):
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> words_list[i] == answer_list[<span class="hljs-number">0</span>] <span class="hljs-keyword">and</span> words_list[i : i + <span class="hljs-built_in">len</span>(answer_list)] == answer_list:
<span class="hljs-meta">... </span> matches.append(answer_list)
<span class="hljs-meta">... </span> start_indices.append(idx)
<span class="hljs-meta">... </span> end_indices.append(idx + <span class="hljs-built_in">len</span>(answer_list) - <span class="hljs-number">1</span>)
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> matches:
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> matches[<span class="hljs-number">0</span>], start_indices[<span class="hljs-number">0</span>], end_indices[<span class="hljs-number">0</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> <span class="hljs-literal">None</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span>`,wrap:!1}}),Us=new g({props:{code:"ZXhhbXBsZSUyMCUzRCUyMGRhdGFzZXRfd2l0aF9vY3IlNUIlMjJ0cmFpbiUyMiU1RCU1QjElNUQlMEF3b3JkcyUyMCUzRCUyMCU1QndvcmQubG93ZXIoKSUyMGZvciUyMHdvcmQlMjBpbiUyMGV4YW1wbGUlNUIlMjJ3b3JkcyUyMiU1RCU1RCUwQW1hdGNoJTJDJTIwd29yZF9pZHhfc3RhcnQlMkMlMjB3b3JkX2lkeF9lbmQlMjAlM0QlMjBzdWJmaW5kZXIod29yZHMlMkMlMjBleGFtcGxlJTVCJTIyYW5zd2VyJTIyJTVELmxvd2VyKCkuc3BsaXQoKSklMEFwcmludCglMjJRdWVzdGlvbiUzQSUyMCUyMiUyQyUyMGV4YW1wbGUlNUIlMjJxdWVzdGlvbiUyMiU1RCklMEFwcmludCglMjJXb3JkcyUzQSUyMiUyQyUyMHdvcmRzKSUwQXByaW50KCUyMkFuc3dlciUzQSUyMCUyMiUyQyUyMGV4YW1wbGUlNUIlMjJhbnN3ZXIlMjIlNUQpJTBBcHJpbnQoJTIyc3RhcnRfaW5kZXglMjIlMkMlMjB3b3JkX2lkeF9zdGFydCklMEFwcmludCglMjJlbmRfaW5kZXglMjIlMkMlMjB3b3JkX2lkeF9lbmQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>example = dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">1</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>words = [word.lower() <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> example[<span class="hljs-string">&quot;words&quot;</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">match</span>, word_idx_start, word_idx_end = subfinder(words, example[<span class="hljs-string">&quot;answer&quot;</span>].lower().split())
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Question: &quot;</span>, example[<span class="hljs-string">&quot;question&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Words:&quot;</span>, words)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Answer: &quot;</span>, example[<span class="hljs-string">&quot;answer&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;start_index&quot;</span>, word_idx_start)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;end_index&quot;</span>, word_idx_end)
Question: Who <span class="hljs-keyword">is</span> <span class="hljs-keyword">in</span> cc <span class="hljs-keyword">in</span> this letter?
Words: [<span class="hljs-string">&#x27;wie&#x27;</span>, <span class="hljs-string">&#x27;baw&#x27;</span>, <span class="hljs-string">&#x27;brown&#x27;</span>, <span class="hljs-string">&#x27;&amp;&#x27;</span>, <span class="hljs-string">&#x27;williamson&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;corporation&#x27;</span>, <span class="hljs-string">&#x27;research&#x27;</span>, <span class="hljs-string">&#x27;&amp;&#x27;</span>, <span class="hljs-string">&#x27;development&#x27;</span>, <span class="hljs-string">&#x27;internal&#x27;</span>, <span class="hljs-string">&#x27;correspondence&#x27;</span>, <span class="hljs-string">&#x27;to:&#x27;</span>, <span class="hljs-string">&#x27;r.&#x27;</span>, <span class="hljs-string">&#x27;h.&#x27;</span>, <span class="hljs-string">&#x27;honeycutt&#x27;</span>, <span class="hljs-string">&#x27;ce:&#x27;</span>, <span class="hljs-string">&#x27;t.f.&#x27;</span>, <span class="hljs-string">&#x27;riehl&#x27;</span>, <span class="hljs-string">&#x27;from:&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;c.j.&#x27;</span>, <span class="hljs-string">&#x27;cook&#x27;</span>, <span class="hljs-string">&#x27;date:&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;8,&#x27;</span>, <span class="hljs-string">&#x27;1995&#x27;</span>, <span class="hljs-string">&#x27;subject:&#x27;</span>, <span class="hljs-string">&#x27;review&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;existing&#x27;</span>, <span class="hljs-string">&#x27;brainstorming&#x27;</span>, <span class="hljs-string">&#x27;ideas/483&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;major&#x27;</span>, <span class="hljs-string">&#x27;function&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;product&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;graup&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;develop&#x27;</span>, <span class="hljs-string">&#x27;marketable&#x27;</span>, <span class="hljs-string">&#x27;nove!&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;would&#x27;</span>, <span class="hljs-string">&#x27;be&#x27;</span>, <span class="hljs-string">&#x27;profitable&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;manufacture&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;sell.&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;defined&#x27;</span>, <span class="hljs-string">&#x27;as:&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;kind,&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;anything&#x27;</span>, <span class="hljs-string">&#x27;seen&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;known&#x27;</span>, <span class="hljs-string">&#x27;before.&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;defined&#x27;</span>, <span class="hljs-string">&#x27;as:&#x27;</span>, <span class="hljs-string">&#x27;something&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;introduced;&#x27;</span>, <span class="hljs-string">&#x27;act&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;innovating;&#x27;</span>, <span class="hljs-string">&#x27;introduction&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;things&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;methods.&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;incorporate&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;latest&#x27;</span>, <span class="hljs-string">&#x27;technologies,&#x27;</span>, <span class="hljs-string">&#x27;materials&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;know-how&#x27;</span>, <span class="hljs-string">&#x27;available&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;give&#x27;</span>, <span class="hljs-string">&#x27;then&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;unique&#x27;</span>, <span class="hljs-string">&#x27;taste&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;look.&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;first&#x27;</span>, <span class="hljs-string">&#x27;task&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;product&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;group&#x27;</span>, <span class="hljs-string">&#x27;was&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;assemble,&#x27;</span>, <span class="hljs-string">&#x27;review&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;categorize&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;list&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;existing&#x27;</span>, <span class="hljs-string">&#x27;brainstorming&#x27;</span>, <span class="hljs-string">&#x27;ideas.&#x27;</span>, <span class="hljs-string">&#x27;ideas&#x27;</span>, <span class="hljs-string">&#x27;were&#x27;</span>, <span class="hljs-string">&#x27;grouped&#x27;</span>, <span class="hljs-string">&#x27;into&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;major&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;labeled&#x27;</span>, <span class="hljs-string">&#x27;appearance&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;taste/aroma.&#x27;</span>, <span class="hljs-string">&#x27;these&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;are&#x27;</span>, <span class="hljs-string">&#x27;used&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;differ&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;visual&#x27;</span>, <span class="hljs-string">&#x27;and/or&#x27;</span>, <span class="hljs-string">&#x27;taste/aroma&#x27;</span>, <span class="hljs-string">&#x27;point&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;view&#x27;</span>, <span class="hljs-string">&#x27;compared&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;canventional&#x27;</span>, <span class="hljs-string">&#x27;cigarettes.&#x27;</span>, <span class="hljs-string">&#x27;other&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;include&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;combination&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;above,&#x27;</span>, <span class="hljs-string">&#x27;filters,&#x27;</span>, <span class="hljs-string">&#x27;packaging&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;brand&#x27;</span>, <span class="hljs-string">&#x27;extensions.&#x27;</span>, <span class="hljs-string">&#x27;appearance&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;category&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;used&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;constructions&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;yield&#x27;</span>, <span class="hljs-string">&#x27;visually&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;with&#x27;</span>, <span class="hljs-string">&#x27;minimal&#x27;</span>, <span class="hljs-string">&#x27;changes&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;smoke&#x27;</span>, <span class="hljs-string">&#x27;chemistry&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;cigarettes&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;cne.&#x27;</span>, <span class="hljs-string">&#x27;emulti-plug&#x27;</span>, <span class="hljs-string">&#x27;te&#x27;</span>, <span class="hljs-string">&#x27;build&#x27;</span>, <span class="hljs-string">&#x27;yaur&#x27;</span>, <span class="hljs-string">&#x27;awn&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;eswitchable&#x27;</span>, <span class="hljs-string">&#x27;menthol&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;non&#x27;</span>, <span class="hljs-string">&#x27;menthol&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;*cigarettes&#x27;</span>, <span class="hljs-string">&#x27;with&#x27;</span>, <span class="hljs-string">&#x27;interspaced&#x27;</span>, <span class="hljs-string">&#x27;perforations&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;enable&#x27;</span>, <span class="hljs-string">&#x27;smoker&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;separate&#x27;</span>, <span class="hljs-string">&#x27;unburned&#x27;</span>, <span class="hljs-string">&#x27;section&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;future&#x27;</span>, <span class="hljs-string">&#x27;smoking.&#x27;</span>, <span class="hljs-string">&#x27;«short&#x27;</span>, <span class="hljs-string">&#x27;cigarette,&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;section&#x27;</span>, <span class="hljs-string">&#x27;30&#x27;</span>, <span class="hljs-string">&#x27;mm.&#x27;</span>, <span class="hljs-string">&#x27;«extremely&#x27;</span>, <span class="hljs-string">&#x27;fast&#x27;</span>, <span class="hljs-string">&#x27;buming&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;«novel&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;constructions&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;permit&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;significant&#x27;</span>, <span class="hljs-string">&#x27;reduction&#x27;</span>, <span class="hljs-string">&#x27;iretobacco&#x27;</span>, <span class="hljs-string">&#x27;weight&#x27;</span>, <span class="hljs-string">&#x27;while&#x27;</span>, <span class="hljs-string">&#x27;maintaining&#x27;</span>, <span class="hljs-string">&#x27;smoking&#x27;</span>, <span class="hljs-string">&#x27;mechanics&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;visual&#x27;</span>, <span class="hljs-string">&#x27;characteristics.&#x27;</span>, <span class="hljs-string">&#x27;higher&#x27;</span>, <span class="hljs-string">&#x27;basis&#x27;</span>, <span class="hljs-string">&#x27;weight&#x27;</span>, <span class="hljs-string">&#x27;paper:&#x27;</span>, <span class="hljs-string">&#x27;potential&#x27;</span>, <span class="hljs-string">&#x27;reduction&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;weight.&#x27;</span>, <span class="hljs-string">&#x27;«more&#x27;</span>, <span class="hljs-string">&#x27;rigid&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;column;&#x27;</span>, <span class="hljs-string">&#x27;stiffing&#x27;</span>, <span class="hljs-string">&#x27;agent&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;tobacco;&#x27;</span>, <span class="hljs-string">&#x27;e.g.&#x27;</span>, <span class="hljs-string">&#x27;starch&#x27;</span>, <span class="hljs-string">&#x27;*colored&#x27;</span>, <span class="hljs-string">&#x27;tow&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;papers;&#x27;</span>, <span class="hljs-string">&#x27;seasonal&#x27;</span>, <span class="hljs-string">&#x27;promotions,&#x27;</span>, <span class="hljs-string">&#x27;e.g.&#x27;</span>, <span class="hljs-string">&#x27;pastel&#x27;</span>, <span class="hljs-string">&#x27;colored&#x27;</span>, <span class="hljs-string">&#x27;cigarettes&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;easter&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;an&#x27;</span>, <span class="hljs-string">&#x27;ebony&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;ivory&#x27;</span>, <span class="hljs-string">&#x27;brand&#x27;</span>, <span class="hljs-string">&#x27;containing&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;mixture&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;all&#x27;</span>, <span class="hljs-string">&#x27;black&#x27;</span>, <span class="hljs-string">&#x27;(black&#x27;</span>, <span class="hljs-string">&#x27;paper&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;tow)&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;ail&#x27;</span>, <span class="hljs-string">&#x27;white&#x27;</span>, <span class="hljs-string">&#x27;cigarettes.&#x27;</span>, <span class="hljs-string">&#x27;499150498&#x27;</span>]
Answer: T.F. Riehl
start_index <span class="hljs-number">17</span>
end_index <span class="hljs-number">18</span>`,wrap:!1}}),ds=new g({props:{code:"ZW5jb2RpbmclMjAlM0QlMjB0b2tlbml6ZXIoZXhhbXBsZSU1QiUyMnF1ZXN0aW9uJTIyJTVEJTJDJTIwZXhhbXBsZSU1QiUyMndvcmRzJTIyJTVEJTJDJTIwZXhhbXBsZSU1QiUyMmJveGVzJTIyJTVEKSUwQXRva2VuaXplci5kZWNvZGUoZW5jb2RpbmclNUIlMjJpbnB1dF9pZHMlMjIlNUQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>encoding = tokenizer(example[<span class="hljs-string">&quot;question&quot;</span>], example[<span class="hljs-string">&quot;words&quot;</span>], example[<span class="hljs-string">&quot;boxes&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.decode(encoding[<span class="hljs-string">&quot;input_ids&quot;</span>])
[CLS] who <span class="hljs-keyword">is</span> <span class="hljs-keyword">in</span> cc <span class="hljs-keyword">in</span> this letter? [SEP] wie baw brown &amp; williamson tobacco corporation research &amp; development ...`,wrap:!1}}),Ts=new g({props:{code:"ZGVmJTIwZW5jb2RlX2RhdGFzZXQoZXhhbXBsZXMlMkMlMjBtYXhfbGVuZ3RoJTNENTEyKSUzQSUwQSUyMCUyMCUyMCUyMHF1ZXN0aW9ucyUyMCUzRCUyMGV4YW1wbGVzJTVCJTIycXVlc3Rpb24lMjIlNUQlMEElMjAlMjAlMjAlMjB3b3JkcyUyMCUzRCUyMGV4YW1wbGVzJTVCJTIyd29yZHMlMjIlNUQlMEElMjAlMjAlMjAlMjBib3hlcyUyMCUzRCUyMGV4YW1wbGVzJTVCJTIyYm94ZXMlMjIlNUQlMEElMjAlMjAlMjAlMjBhbnN3ZXJzJTIwJTNEJTIwZXhhbXBsZXMlNUIlMjJhbnN3ZXIlMjIlNUQlMEElMEElMjAlMjAlMjAlMjAlMjMlMjAlRUMlOTglODglRUMlQTAlOUMlMjAlRUIlQjAlQjAlRUMlQjklOTglRUIlQTUlQkMlMjAlRUMlOUQlQjglRUMlQkQlOTQlRUIlOTQlQTklRUQlOTUlOTglRUElQjMlQTAlMjBzdGFydF9wb3NpdGlvbnMlRUMlOTklODAlMjBlbmRfcG9zaXRpb25zJUVCJUE1JUJDJTIwJUVDJUI0JTg4JUVBJUI4JUIwJUVEJTk5JTk0JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0JTBBJTIwJTIwJTIwJTIwZW5jb2RpbmclMjAlM0QlMjB0b2tlbml6ZXIocXVlc3Rpb25zJTJDJTIwd29yZHMlMkMlMjBib3hlcyUyQyUyMG1heF9sZW5ndGglM0RtYXhfbGVuZ3RoJTJDJTIwcGFkZGluZyUzRCUyMm1heF9sZW5ndGglMjIlMkMlMjB0cnVuY2F0aW9uJTNEVHJ1ZSklMEElMjAlMjAlMjAlMjBzdGFydF9wb3NpdGlvbnMlMjAlM0QlMjAlNUIlNUQlMEElMjAlMjAlMjAlMjBlbmRfcG9zaXRpb25zJTIwJTNEJTIwJTVCJTVEJTBBJTBBJTIwJTIwJTIwJTIwJTIzJTIwJUVCJUIwJUIwJUVDJUI5JTk4JUVDJTlEJTk4JTIwJUVDJTk4JTg4JUVDJUEwJTlDJUVCJUE1JUJDJTIwJUVCJUIwJTk4JUVCJUIzJUI1JUVEJTk1JUE5JUVCJThCJTg4JUVCJThCJUE0JTBBJTIwJTIwJTIwJTIwZm9yJTIwaSUyMGluJTIwcmFuZ2UobGVuKHF1ZXN0aW9ucykpJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwY2xzX2luZGV4JTIwJTNEJTIwZW5jb2RpbmclNUIlMjJpbnB1dF9pZHMlMjIlNUQlNUJpJTVELmluZGV4KHRva2VuaXplci5jbHNfdG9rZW5faWQpJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIzJTIwJUVDJTk4JTg4JUVDJUEwJTlDJUVDJTlEJTk4JTIwd29yZHMlRUMlOTclOTAlRUMlODQlOUMlMjAlRUIlOEIlQjUlRUIlQjMlODAlRUMlOUQlOTglMjAlRUMlOUMlODQlRUMlQjklOTglRUIlQTUlQkMlMjAlRUMlQjAlQkUlRUMlOEElQjUlRUIlOEIlODglRUIlOEIlQTQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3b3Jkc19leGFtcGxlJTIwJTNEJTIwJTVCd29yZC5sb3dlcigpJTIwZm9yJTIwd29yZCUyMGluJTIwd29yZHMlNUJpJTVEJTVEJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwYW5zd2VyJTIwJTNEJTIwYW5zd2VycyU1QmklNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBtYXRjaCUyQyUyMHdvcmRfaWR4X3N0YXJ0JTJDJTIwd29yZF9pZHhfZW5kJTIwJTNEJTIwc3ViZmluZGVyKHdvcmRzX2V4YW1wbGUlMkMlMjBhbnN3ZXIubG93ZXIoKS5zcGxpdCgpKSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGlmJTIwbWF0Y2glM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjMlMjAlRUMlOUQlQkMlRUMlQjklOTglRUQlOTUlOTglRUIlOEElOTQlMjAlRUQlOTUlQUQlRUIlQUElQTklRUMlOUQlODQlMjAlRUIlQjAlOUMlRUElQjIlQUMlRUQlOTUlOTglRUIlQTklQjQlMkMlMjAlNjB0b2tlbl90eXBlX2lkcyU2MCVFQiVBNSVCQyUyMCVFQyU4MiVBQyVFQyU5QSVBOSVFRCU5NSVCNCUyMCVFQyU5RCVCOCVFQyVCRCU5NCVFQiU5NCVBOSVFQyU5NyU5MCVFQyU4NCU5QyUyMCVFQiU4QiVBOCVFQyU5NiVCNCVFQSVCMCU4MCUyMCVFQyU4QiU5QyVFQyU5RSU5MSVFRCU5NSU5OCVFQiU4QSU5NCUyMCVFQyU5QyU4NCVFQyVCOSU5OCVFQiVBNSVCQyUyMCVFQyVCMCVCRSVFQyU4QSVCNSVFQiU4QiU4OCVFQiU4QiVBNCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRva2VuX3R5cGVfaWRzJTIwJTNEJTIwZW5jb2RpbmclNUIlMjJ0b2tlbl90eXBlX2lkcyUyMiU1RCU1QmklNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b2tlbl9zdGFydF9pbmRleCUyMCUzRCUyMDAlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3aGlsZSUyMHRva2VuX3R5cGVfaWRzJTVCdG9rZW5fc3RhcnRfaW5kZXglNUQlMjAhJTNEJTIwMSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRva2VuX3N0YXJ0X2luZGV4JTIwJTJCJTNEJTIwMSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRva2VuX2VuZF9pbmRleCUyMCUzRCUyMGxlbihlbmNvZGluZyU1QiUyMmlucHV0X2lkcyUyMiU1RCU1QmklNUQpJTIwLSUyMDElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB3aGlsZSUyMHRva2VuX3R5cGVfaWRzJTVCdG9rZW5fZW5kX2luZGV4JTVEJTIwISUzRCUyMDElM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjB0b2tlbl9lbmRfaW5kZXglMjAtJTNEJTIwMSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHdvcmRfaWRzJTIwJTNEJTIwZW5jb2Rpbmcud29yZF9pZHMoaSklNUJ0b2tlbl9zdGFydF9pbmRleCUyMCUzQSUyMHRva2VuX2VuZF9pbmRleCUyMCUyQiUyMDElNUQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGFydF9wb3NpdGlvbiUyMCUzRCUyMGNsc19pbmRleCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGVuZF9wb3NpdGlvbiUyMCUzRCUyMGNsc19pbmRleCUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMHdvcmRzJUVDJTlEJTk4JTIwJUVCJThCJUI1JUVCJUIzJTgwJTIwJUVDJTlDJTg0JUVDJUI5JTk4JUVDJTk5JTgwJTIwJUVDJTlEJUJDJUVDJUI5JTk4JUVEJTk1JUEwJTIwJUVCJTk1JThDJUVBJUI5JThDJUVDJUE3JTgwJTIwd29yZF9pZHMlRUIlQTUlQkMlMjAlRUIlQjAlOTglRUIlQjMlQjUlRUQlOTUlOTglRUElQjMlQTAlMjAlNjB0b2tlbl9zdGFydF9pbmRleCU2MCVFQiVBNSVCQyUyMCVFQiU4QSU5OCVFQiVBNiVCRCVFQiU4QiU4OCVFQiU4QiVBNCUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMCVFQyU5RCVCQyVFQyVCOSU5OCVFRCU5NSU5OCVFQiVBOSVCNCUyMCU2MHRva2VuX3N0YXJ0X2luZGV4JTYwJUVCJUE1JUJDJTIwJUVDJTlEJUI4JUVDJUJEJTk0JUVCJTk0JUE5JUVDJTk3JTkwJUVDJTg0JTlDJTIwJUVCJThCJUI1JUVCJUIzJTgwJUVDJTlEJTk4JTIwJTYwc3RhcnRfcG9zaXRpb24lNjAlRUMlOUMlQkMlRUIlQTElOUMlMjAlRUMlQTAlODAlRUMlOUUlQTUlRUQlOTUlQTklRUIlOEIlODglRUIlOEIlQTQlMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBmb3IlMjBpZCUyMGluJTIwd29yZF9pZHMlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMGlkJTIwJTNEJTNEJTIwd29yZF9pZHhfc3RhcnQlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBzdGFydF9wb3NpdGlvbiUyMCUzRCUyMHRva2VuX3N0YXJ0X2luZGV4JTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZWxzZSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRva2VuX3N0YXJ0X2luZGV4JTIwJTJCJTNEJTIwMSUwQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMyUyMCVFQiVCOSU4NCVFQyU4QSVCNyVFRCU5NSU5OCVFQSVCMiU4QyUyQyUyMCVFQiU4MSU5RCVFQyU5NyU5MCVFQyU4NCU5QyUyMCVFQyU4QiU5QyVFQyU5RSU5MSVFRCU5NSVCNCUyMCU2MHdvcmRfaWRzJTYwJUVCJUE1JUJDJTIwJUVCJUIwJTk4JUVCJUIzJUI1JUVEJTk1JTk4JUVCJUE5JUIwJTIwJUVCJThCJUI1JUVCJUIzJTgwJUVDJTlEJTk4JTIwJTYwZW5kX3Bvc2l0aW9uJTYwJUVDJTlEJTg0JTIwJUVDJUIwJUJFJUVDJThBJUI1JUVCJThCJTg4JUVCJThCJUE0JTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZm9yJTIwaWQlMjBpbiUyMHdvcmRfaWRzJTVCJTNBJTNBLTElNUQlM0ElMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBpZiUyMGlkJTIwJTNEJTNEJTIwd29yZF9pZHhfZW5kJTNBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZW5kX3Bvc2l0aW9uJTIwJTNEJTIwdG9rZW5fZW5kX2luZGV4JTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZWxzZSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHRva2VuX2VuZF9pbmRleCUyMC0lM0QlMjAxJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwc3RhcnRfcG9zaXRpb25zLmFwcGVuZChzdGFydF9wb3NpdGlvbiklMEElMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjAlMjBlbmRfcG9zaXRpb25zLmFwcGVuZChlbmRfcG9zaXRpb24pJTBBJTBBJTIwJTIwJTIwJTIwJTIwJTIwJTIwJTIwZWxzZSUzQSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMHN0YXJ0X3Bvc2l0aW9ucy5hcHBlbmQoY2xzX2luZGV4KSUwQSUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMCUyMGVuZF9wb3NpdGlvbnMuYXBwZW5kKGNsc19pbmRleCklMEElMEElMjAlMjAlMjAlMjBlbmNvZGluZyU1QiUyMmltYWdlJTIyJTVEJTIwJTNEJTIwZXhhbXBsZXMlNUIlMjJpbWFnZSUyMiU1RCUwQSUyMCUyMCUyMCUyMGVuY29kaW5nJTVCJTIyc3RhcnRfcG9zaXRpb25zJTIyJTVEJTIwJTNEJTIwc3RhcnRfcG9zaXRpb25zJTBBJTIwJTIwJTIwJTIwZW5jb2RpbmclNUIlMjJlbmRfcG9zaXRpb25zJTIyJTVEJTIwJTNEJTIwZW5kX3Bvc2l0aW9ucyUwQSUwQSUyMCUyMCUyMCUyMHJldHVybiUyMGVuY29kaW5n",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">encode_dataset</span>(<span class="hljs-params">examples, max_length=<span class="hljs-number">512</span></span>):
<span class="hljs-meta">... </span> questions = examples[<span class="hljs-string">&quot;question&quot;</span>]
<span class="hljs-meta">... </span> words = examples[<span class="hljs-string">&quot;words&quot;</span>]
<span class="hljs-meta">... </span> boxes = examples[<span class="hljs-string">&quot;boxes&quot;</span>]
<span class="hljs-meta">... </span> answers = examples[<span class="hljs-string">&quot;answer&quot;</span>]
<span class="hljs-meta">... </span> <span class="hljs-comment"># 예제 배치를 인코딩하고 start_positions와 end_positions를 초기화합니다</span>
<span class="hljs-meta">... </span> encoding = tokenizer(questions, words, boxes, max_length=max_length, padding=<span class="hljs-string">&quot;max_length&quot;</span>, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-meta">... </span> start_positions = []
<span class="hljs-meta">... </span> end_positions = []
<span class="hljs-meta">... </span> <span class="hljs-comment"># 배치의 예제를 반복합니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(questions)):
<span class="hljs-meta">... </span> cls_index = encoding[<span class="hljs-string">&quot;input_ids&quot;</span>][i].index(tokenizer.cls_token_id)
<span class="hljs-meta">... </span> <span class="hljs-comment"># 예제의 words에서 답변의 위치를 찾습니다</span>
<span class="hljs-meta">... </span> words_example = [word.lower() <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> words[i]]
<span class="hljs-meta">... </span> answer = answers[i]
<span class="hljs-meta">... </span> <span class="hljs-keyword">match</span>, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">match</span>:
<span class="hljs-meta">... </span> <span class="hljs-comment"># 일치하는 항목을 발견하면, \`token_type_ids\`를 사용해 인코딩에서 단어가 시작하는 위치를 찾습니다</span>
<span class="hljs-meta">... </span> token_type_ids = encoding[<span class="hljs-string">&quot;token_type_ids&quot;</span>][i]
<span class="hljs-meta">... </span> token_start_index = <span class="hljs-number">0</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">while</span> token_type_ids[token_start_index] != <span class="hljs-number">1</span>:
<span class="hljs-meta">... </span> token_start_index += <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> token_end_index = <span class="hljs-built_in">len</span>(encoding[<span class="hljs-string">&quot;input_ids&quot;</span>][i]) - <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">while</span> token_type_ids[token_end_index] != <span class="hljs-number">1</span>:
<span class="hljs-meta">... </span> token_end_index -= <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> word_ids = encoding.word_ids(i)[token_start_index : token_end_index + <span class="hljs-number">1</span>]
<span class="hljs-meta">... </span> start_position = cls_index
<span class="hljs-meta">... </span> end_position = cls_index
<span class="hljs-meta">... </span> <span class="hljs-comment"># words의 답변 위치와 일치할 때까지 word_ids를 반복하고 \`token_start_index\`를 늘립니다</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># 일치하면 \`token_start_index\`를 인코딩에서 답변의 \`start_position\`으로 저장합니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> <span class="hljs-built_in">id</span> <span class="hljs-keyword">in</span> word_ids:
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-built_in">id</span> == word_idx_start:
<span class="hljs-meta">... </span> start_position = token_start_index
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> token_start_index += <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># 비슷하게, 끝에서 시작해 \`word_ids\`를 반복하며 답변의 \`end_position\`을 찾습니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> <span class="hljs-built_in">id</span> <span class="hljs-keyword">in</span> word_ids[::-<span class="hljs-number">1</span>]:
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-built_in">id</span> == word_idx_end:
<span class="hljs-meta">... </span> end_position = token_end_index
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> token_end_index -= <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> start_positions.append(start_position)
<span class="hljs-meta">... </span> end_positions.append(end_position)
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> start_positions.append(cls_index)
<span class="hljs-meta">... </span> end_positions.append(cls_index)
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;image&quot;</span>] = examples[<span class="hljs-string">&quot;image&quot;</span>]
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;start_positions&quot;</span>] = start_positions
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;end_positions&quot;</span>] = end_positions
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> encoding`,wrap:!1}}),Cs=new g({props:{code:"ZW5jb2RlZF90cmFpbl9kYXRhc2V0JTIwJTNEJTIwZGF0YXNldF93aXRoX29jciU1QiUyMnRyYWluJTIyJTVELm1hcCglMEElMjAlMjAlMjAlMjBlbmNvZGVfZGF0YXNldCUyQyUyMGJhdGNoZWQlM0RUcnVlJTJDJTIwYmF0Y2hfc2l6ZSUzRDIlMkMlMjByZW1vdmVfY29sdW1ucyUzRGRhdGFzZXRfd2l0aF9vY3IlNUIlMjJ0cmFpbiUyMiU1RC5jb2x1bW5fbmFtZXMlMEEpJTBBZW5jb2RlZF90ZXN0X2RhdGFzZXQlMjAlM0QlMjBkYXRhc2V0X3dpdGhfb2NyJTVCJTIydGVzdCUyMiU1RC5tYXAoJTBBJTIwJTIwJTIwJTIwZW5jb2RlX2RhdGFzZXQlMkMlMjBiYXRjaGVkJTNEVHJ1ZSUyQyUyMGJhdGNoX3NpemUlM0QyJTJDJTIwcmVtb3ZlX2NvbHVtbnMlM0RkYXRhc2V0X3dpdGhfb2NyJTVCJTIydGVzdCUyMiU1RC5jb2x1bW5fbmFtZXMlMEEp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_train_dataset = dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>].<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> encode_dataset, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>, remove_columns=dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>].column_names
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_test_dataset = dataset_with_ocr[<span class="hljs-string">&quot;test&quot;</span>].<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> encode_dataset, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>, remove_columns=dataset_with_ocr[<span class="hljs-string">&quot;test&quot;</span>].column_names
<span class="hljs-meta">... </span>)`,wrap:!1}}),fs=new g({props:{code:"ZW5jb2RlZF90cmFpbl9kYXRhc2V0LmZlYXR1cmVz",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_train_dataset.features
{<span class="hljs-string">&#x27;image&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;uint8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;input_ids&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int32&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;bbox&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;start_positions&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;end_positions&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>)}`,wrap:!1}}),Is=new C({props:{title:"평가",local:"evaluation",headingTag:"h2"}}),Vs=new C({props:{title:"훈련",local:"train",headingTag:"h2"}}),Zs=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMEF1dG9Nb2RlbEZvckRvY3VtZW50UXVlc3Rpb25BbnN3ZXJpbmclMEElMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckRvY3VtZW50UXVlc3Rpb25BbnN3ZXJpbmcuZnJvbV9wcmV0cmFpbmVkKG1vZGVsX2NoZWNrcG9pbnQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForDocumentQuestionAnswering
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)`,wrap:!1}}),As=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRyYWluaW5nQXJndW1lbnRzJTBBJTBBJTIzJTIwJUVCJUIzJUI4JUVDJTlEJUI4JUVDJTlEJTk4JTIwJUVCJUEwJTg4JUVEJThGJUFDJUVDJUE3JTgwJUVEJTg2JUEwJUVCJUE2JUFDJTIwSUQlRUIlQTElOUMlMjAlRUIlQjAlOTQlRUElQkUlQjglRUMlODQlQjglRUMlOUElOTQlMEFyZXBvX2lkJTIwJTNEJTIwJTIyTWFyaWFLJTJGbGF5b3V0bG12Mi1iYXNlLXVuY2FzZWRfZmluZXR1bmVkX2RvY3ZxYSUyMiUwQSUwQXRyYWluaW5nX2FyZ3MlMjAlM0QlMjBUcmFpbmluZ0FyZ3VtZW50cyglMEElMjAlMjAlMjAlMjBvdXRwdXRfZGlyJTNEcmVwb19pZCUyQyUwQSUyMCUyMCUyMCUyMHBlcl9kZXZpY2VfdHJhaW5fYmF0Y2hfc2l6ZSUzRDQlMkMlMEElMjAlMjAlMjAlMjBudW1fdHJhaW5fZXBvY2hzJTNEMjAlMkMlMEElMjAlMjAlMjAlMjBzYXZlX3N0ZXBzJTNEMjAwJTJDJTBBJTIwJTIwJTIwJTIwbG9nZ2luZ19zdGVwcyUzRDUwJTJDJTBBJTIwJTIwJTIwJTIwZXZhbHVhdGlvbl9zdHJhdGVneSUzRCUyMnN0ZXBzJTIyJTJDJTBBJTIwJTIwJTIwJTIwbGVhcm5pbmdfcmF0ZSUzRDVlLTUlMkMlMEElMjAlMjAlMjAlMjBzYXZlX3RvdGFsX2xpbWl0JTNEMiUyQyUwQSUyMCUyMCUyMCUyMHJlbW92ZV91bnVzZWRfY29sdW1ucyUzREZhbHNlJTJDJTBBJTIwJTIwJTIwJTIwcHVzaF90b19odWIlM0RUcnVlJTJDJTBBKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TrainingArguments
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># 본인의 레포지토리 ID로 바꾸세요</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>repo_id = <span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>training_args = TrainingArguments(
<span class="hljs-meta">... </span> output_dir=repo_id,
<span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">4</span>,
<span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">20</span>,
<span class="hljs-meta">... </span> save_steps=<span class="hljs-number">200</span>,
<span class="hljs-meta">... </span> logging_steps=<span class="hljs-number">50</span>,
<span class="hljs-meta">... </span> evaluation_strategy=<span class="hljs-string">&quot;steps&quot;</span>,
<span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">5e-5</span>,
<span class="hljs-meta">... </span> save_total_limit=<span class="hljs-number">2</span>,
<span class="hljs-meta">... </span> remove_unused_columns=<span class="hljs-literal">False</span>,
<span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span>)`,wrap:!1}}),Xs=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMERlZmF1bHREYXRhQ29sbGF0b3IlMEElMEFkYXRhX2NvbGxhdG9yJTIwJTNEJTIwRGVmYXVsdERhdGFDb2xsYXRvcigp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DefaultDataCollator
<span class="hljs-meta">&gt;&gt;&gt; </span>data_collator = DefaultDataCollator()`,wrap:!1}}),Ns=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMFRyYWluZXIlMEElMEF0cmFpbmVyJTIwJTNEJTIwVHJhaW5lciglMEElMjAlMjAlMjAlMjBtb2RlbCUzRG1vZGVsJTJDJTBBJTIwJTIwJTIwJTIwYXJncyUzRHRyYWluaW5nX2FyZ3MlMkMlMEElMjAlMjAlMjAlMjBkYXRhX2NvbGxhdG9yJTNEZGF0YV9jb2xsYXRvciUyQyUwQSUyMCUyMCUyMCUyMHRyYWluX2RhdGFzZXQlM0RlbmNvZGVkX3RyYWluX2RhdGFzZXQlMkMlMEElMjAlMjAlMjAlMjBldmFsX2RhdGFzZXQlM0RlbmNvZGVkX3Rlc3RfZGF0YXNldCUyQyUwQSUyMCUyMCUyMCUyMHRva2VuaXplciUzRHByb2Nlc3NvciUyQyUwQSklMEElMEF0cmFpbmVyLnRyYWluKCk=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer = Trainer(
<span class="hljs-meta">... </span> model=model,
<span class="hljs-meta">... </span> args=training_args,
<span class="hljs-meta">... </span> data_collator=data_collator,
<span class="hljs-meta">... </span> train_dataset=encoded_train_dataset,
<span class="hljs-meta">... </span> eval_dataset=encoded_test_dataset,
<span class="hljs-meta">... </span> tokenizer=processor,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.train()`,wrap:!1}}),Es=new g({props:{code:"dHJhaW5lci5jcmVhdGVfbW9kZWxfY2FyZCgpJTBBdHJhaW5lci5wdXNoX3RvX2h1Yigp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.create_model_card()
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.push_to_hub()`,wrap:!1}}),Bs=new C({props:{title:"추론",local:"inference",headingTag:"h2"}}),zs=new g({props:{code:"ZXhhbXBsZSUyMCUzRCUyMGRhdGFzZXQlNUIlMjJ0ZXN0JTIyJTVEJTVCMiU1RCUwQXF1ZXN0aW9uJTIwJTNEJTIwZXhhbXBsZSU1QiUyMnF1ZXJ5JTIyJTVEJTVCJTIyZW4lMjIlNUQlMEFpbWFnZSUyMCUzRCUyMGV4YW1wbGUlNUIlMjJpbWFnZSUyMiU1RCUwQXByaW50KHF1ZXN0aW9uKSUwQXByaW50KGV4YW1wbGUlNUIlMjJhbnN3ZXJzJTIyJTVEKQ==",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span>example = dataset[<span class="hljs-string">&quot;test&quot;</span>][<span class="hljs-number">2</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>question = example[<span class="hljs-string">&quot;query&quot;</span>][<span class="hljs-string">&quot;en&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image = example[<span class="hljs-string">&quot;image&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(question)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(example[<span class="hljs-string">&quot;answers&quot;</span>])
<span class="hljs-string">&#x27;Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?&#x27;</span>
[<span class="hljs-string">&#x27;TRRF Vice President&#x27;</span>, <span class="hljs-string">&#x27;lee a. waller&#x27;</span>]`,wrap:!1}}),Ss=new g({props:{code:"ZnJvbSUyMHRyYW5zZm9ybWVycyUyMGltcG9ydCUyMHBpcGVsaW5lJTBBJTBBcWFfcGlwZWxpbmUlMjAlM0QlMjBwaXBlbGluZSglMjJkb2N1bWVudC1xdWVzdGlvbi1hbnN3ZXJpbmclMjIlMkMlMjBtb2RlbCUzRCUyMk1hcmlhSyUyRmxheW91dGxtdjItYmFzZS11bmNhc2VkX2ZpbmV0dW5lZF9kb2N2cWElMjIpJTBBcWFfcGlwZWxpbmUoaW1hZ2UlMkMlMjBxdWVzdGlvbik=",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>qa_pipeline = pipeline(<span class="hljs-string">&quot;document-question-answering&quot;</span>, model=<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>qa_pipeline(image, question)
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9949808120727539</span>,
<span class="hljs-string">&#x27;answer&#x27;</span>: <span class="hljs-string">&#x27;Lee A. Waller&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">55</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]`,wrap:!1}}),Ds=new g({props:{code:"aW1wb3J0JTIwdG9yY2glMEFmcm9tJTIwdHJhbnNmb3JtZXJzJTIwaW1wb3J0JTIwQXV0b1Byb2Nlc3NvciUwQWZyb20lMjB0cmFuc2Zvcm1lcnMlMjBpbXBvcnQlMjBBdXRvTW9kZWxGb3JEb2N1bWVudFF1ZXN0aW9uQW5zd2VyaW5nJTBBJTBBcHJvY2Vzc29yJTIwJTNEJTIwQXV0b1Byb2Nlc3Nvci5mcm9tX3ByZXRyYWluZWQoJTIyTWFyaWFLJTJGbGF5b3V0bG12Mi1iYXNlLXVuY2FzZWRfZmluZXR1bmVkX2RvY3ZxYSUyMiklMEFtb2RlbCUyMCUzRCUyMEF1dG9Nb2RlbEZvckRvY3VtZW50UXVlc3Rpb25BbnN3ZXJpbmcuZnJvbV9wcmV0cmFpbmVkKCUyMk1hcmlhSyUyRmxheW91dGxtdjItYmFzZS11bmNhc2VkX2ZpbmV0dW5lZF9kb2N2cWElMjIpJTBBJTBBd2l0aCUyMHRvcmNoLm5vX2dyYWQoKSUzQSUwQSUyMCUyMCUyMCUyMGVuY29kaW5nJTIwJTNEJTIwcHJvY2Vzc29yKGltYWdlLmNvbnZlcnQoJTIyUkdCJTIyKSUyQyUyMHF1ZXN0aW9uJTJDJTIwcmV0dXJuX3RlbnNvcnMlM0QlMjJwdCUyMiklMEElMjAlMjAlMjAlMjBvdXRwdXRzJTIwJTNEJTIwbW9kZWwoKiplbmNvZGluZyklMEElMjAlMjAlMjAlMjBzdGFydF9sb2dpdHMlMjAlM0QlMjBvdXRwdXRzLnN0YXJ0X2xvZ2l0cyUwQSUyMCUyMCUyMCUyMGVuZF9sb2dpdHMlMjAlM0QlMjBvdXRwdXRzLmVuZF9sb2dpdHMlMEElMjAlMjAlMjAlMjBwcmVkaWN0ZWRfc3RhcnRfaWR4JTIwJTNEJTIwc3RhcnRfbG9naXRzLmFyZ21heCgtMSkuaXRlbSgpJTBBJTIwJTIwJTIwJTIwcHJlZGljdGVkX2VuZF9pZHglMjAlM0QlMjBlbmRfbG9naXRzLmFyZ21heCgtMSkuaXRlbSgpJTBBJTBBcHJvY2Vzc29yLnRva2VuaXplci5kZWNvZGUoZW5jb2RpbmcuaW5wdXRfaWRzLnNxdWVlemUoKSU1QnByZWRpY3RlZF9zdGFydF9pZHglMjAlM0ElMjBwcmVkaWN0ZWRfZW5kX2lkeCUyMCUyQiUyMDElNUQp",highlighted:`<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForDocumentQuestionAnswering
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForDocumentQuestionAnswering.from_pretrained(<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> encoding = processor(image.convert(<span class="hljs-string">&quot;RGB&quot;</span>), question, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> outputs = model(**encoding)
<span class="hljs-meta">... </span> start_logits = outputs.start_logits
<span class="hljs-meta">... </span> end_logits = outputs.end_logits
<span class="hljs-meta">... </span> predicted_start_idx = start_logits.argmax(-<span class="hljs-number">1</span>).item()
<span class="hljs-meta">... </span> predicted_end_idx = end_logits.argmax(-<span class="hljs-number">1</span>).item()
<span class="hljs-meta">&gt;&gt;&gt; </span>processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + <span class="hljs-number">1</span>])
<span class="hljs-string">&#x27;lee a. waller&#x27;</span>`,wrap:!1}}),{c(){U=p("meta"),b=t(),y=p("p"),d=t(),j(w.$$.fragment),m=t(),j(J.$$.fragment),Ps=t(),f=p("p"),f.textContent=Ia,Ks=t(),I=p("p"),I.textContent=_a,sl=t(),_=p("ul"),_.innerHTML=Va,ll=t(),j(T.$$.fragment),al=t(),V=p("p"),V.textContent=Ra,nl=t(),R=p("p"),R.textContent=ka,tl=t(),j(k.$$.fragment),el=t(),j(Z.$$.fragment),pl=t(),j(Q.$$.fragment),il=t(),A=p("p"),A.textContent=Za,cl=t(),$=p("p"),$.textContent=Qa,jl=t(),j(X.$$.fragment),rl=t(),v=p("p"),v.textContent=Aa,Ml=t(),j(N.$$.fragment),hl=t(),j(G.$$.fragment),ol=t(),E=p("p"),E.innerHTML=$a,xl=t(),j(B.$$.fragment),gl=t(),F=p("p"),F.textContent=Xa,Ul=t(),j(W.$$.fragment),ml=t(),z=p("p"),z.textContent=va,dl=t(),q=p("ul"),q.innerHTML=Na,Jl=t(),S=p("p"),S.innerHTML=Ga,yl=t(),j(Y.$$.fragment),wl=t(),H=p("p"),H.innerHTML=Ea,Tl=t(),j(D.$$.fragment),ul=t(),L=p("p"),L.innerHTML=Ba,Cl=t(),j(O.$$.fragment),bl=t(),P=p("p"),P.textContent=Fa,fl=t(),j(K.$$.fragment),Il=t(),u=p("div"),u.innerHTML=Wa,_l=t(),j(ss.$$.fragment),Vl=t(),ls=p("p"),ls.innerHTML=za,Rl=t(),j(as.$$.fragment),kl=t(),j(ns.$$.fragment),Zl=t(),ts=p("p"),ts.innerHTML=qa,Ql=t(),j(es.$$.fragment),Al=t(),ps=p("p"),ps.innerHTML=Sa,$l=t(),j(is.$$.fragment),Xl=t(),j(cs.$$.fragment),vl=t(),js=p("p"),js.innerHTML=Ya,Nl=t(),j(rs.$$.fragment),Gl=t(),Ms=p("p"),Ms.innerHTML=Ha,El=t(),hs=p("p"),hs.textContent=Da,Bl=t(),os=p("p"),os.innerHTML=La,Fl=t(),j(xs.$$.fragment),Wl=t(),gs=p("p"),gs.textContent=Oa,zl=t(),j(Us.$$.fragment),ql=t(),ms=p("p"),ms.textContent=Pa,Sl=t(),j(ds.$$.fragment),Yl=t(),Js=p("p"),Js.textContent=Ka,Hl=t(),ys=p("ul"),ys.innerHTML=sn,Dl=t(),ws=p("p"),ws.textContent=ln,Ll=t(),j(Ts.$$.fragment),Ol=t(),us=p("p"),us.textContent=an,Pl=t(),j(Cs.$$.fragment),Kl=t(),bs=p("p"),bs.textContent=nn,sa=t(),j(fs.$$.fragment),la=t(),j(Is.$$.fragment),aa=t(),_s=p("p"),_s.innerHTML=tn,na=t(),j(Vs.$$.fragment),ta=t(),Rs=p("p"),Rs.textContent=en,ea=t(),ks=p("ul"),ks.innerHTML=pn,pa=t(),j(Zs.$$.fragment),ia=t(),Qs=p("p"),Qs.innerHTML=cn,ca=t(),j(As.$$.fragment),ja=t(),$s=p("p"),$s.textContent=jn,ra=t(),j(Xs.$$.fragment),Ma=t(),vs=p("p"),vs.innerHTML=rn,ha=t(),j(Ns.$$.fragment),oa=t(),Gs=p("p"),Gs.innerHTML=Mn,xa=t(),j(Es.$$.fragment),ga=t(),j(Bs.$$.fragment),Ua=t(),Fs=p("p"),Fs.innerHTML=hn,ma=t(),Ws=p("p"),Ws.textContent=on,da=t(),j(zs.$$.fragment),Ja=t(),qs=p("p"),qs.textContent=xn,ya=t(),j(Ss.$$.fragment),wa=t(),Ys=p("p"),Ys.textContent=gn,Ta=t(),Hs=p("ol"),Hs.innerHTML=Un,ua=t(),j(Ds.$$.fragment),Ca=t(),Ls=p("p"),this.h()},l(s){const l=Cn("svelte-u9bgzb",document.head);U=i(l,"META",{name:!0,content:!0}),l.forEach(a),b=e(s),y=i(s,"P",{}),dn(y).forEach(a),d=e(s),r(w.$$.fragment,s),m=e(s),r(J.$$.fragment,s),Ps=e(s),f=i(s,"P",{"data-svelte-h":!0}),c(f)!=="svelte-oqnn9h"&&(f.textContent=Ia),Ks=e(s),I=i(s,"P",{"data-svelte-h":!0}),c(I)!=="svelte-k9bbb9"&&(I.textContent=_a),sl=e(s),_=i(s,"UL",{"data-svelte-h":!0}),c(_)!=="svelte-1t9y1pd"&&(_.innerHTML=Va),ll=e(s),r(T.$$.fragment,s),al=e(s),V=i(s,"P",{"data-svelte-h":!0}),c(V)!=="svelte-1t45j8o"&&(V.textContent=Ra),nl=e(s),R=i(s,"P",{"data-svelte-h":!0}),c(R)!=="svelte-ddzvb0"&&(R.textContent=ka),tl=e(s),r(k.$$.fragment,s),el=e(s),r(Z.$$.fragment,s),pl=e(s),r(Q.$$.fragment,s),il=e(s),A=i(s,"P",{"data-svelte-h":!0}),c(A)!=="svelte-1uvmu64"&&(A.textContent=Za),cl=e(s),$=i(s,"P",{"data-svelte-h":!0}),c($)!=="svelte-xyvcw8"&&($.textContent=Qa),jl=e(s),r(X.$$.fragment,s),rl=e(s),v=i(s,"P",{"data-svelte-h":!0}),c(v)!=="svelte-13bifrs"&&(v.textContent=Aa),Ml=e(s),r(N.$$.fragment,s),hl=e(s),r(G.$$.fragment,s),ol=e(s),E=i(s,"P",{"data-svelte-h":!0}),c(E)!=="svelte-12y9xdp"&&(E.innerHTML=$a),xl=e(s),r(B.$$.fragment,s),gl=e(s),F=i(s,"P",{"data-svelte-h":!0}),c(F)!=="svelte-9rg4tz"&&(F.textContent=Xa),Ul=e(s),r(W.$$.fragment,s),ml=e(s),z=i(s,"P",{"data-svelte-h":!0}),c(z)!=="svelte-zq0ej4"&&(z.textContent=va),dl=e(s),q=i(s,"UL",{"data-svelte-h":!0}),c(q)!=="svelte-g2ws24"&&(q.innerHTML=Na),Jl=e(s),S=i(s,"P",{"data-svelte-h":!0}),c(S)!=="svelte-xeapog"&&(S.innerHTML=Ga),yl=e(s),r(Y.$$.fragment,s),wl=e(s),H=i(s,"P",{"data-svelte-h":!0}),c(H)!=="svelte-m4zkx9"&&(H.innerHTML=Ea),Tl=e(s),r(D.$$.fragment,s),ul=e(s),L=i(s,"P",{"data-svelte-h":!0}),c(L)!=="svelte-1xnig6m"&&(L.innerHTML=Ba),Cl=e(s),r(O.$$.fragment,s),bl=e(s),P=i(s,"P",{"data-svelte-h":!0}),c(P)!=="svelte-lp4iy7"&&(P.textContent=Fa),fl=e(s),r(K.$$.fragment,s),Il=e(s),u=i(s,"DIV",{class:!0,"data-svelte-h":!0}),c(u)!=="svelte-q63tj1"&&(u.innerHTML=Wa),_l=e(s),r(ss.$$.fragment,s),Vl=e(s),ls=i(s,"P",{"data-svelte-h":!0}),c(ls)!=="svelte-1y93l9d"&&(ls.innerHTML=za),Rl=e(s),r(as.$$.fragment,s),kl=e(s),r(ns.$$.fragment,s),Zl=e(s),ts=i(s,"P",{"data-svelte-h":!0}),c(ts)!=="svelte-1kcsbqg"&&(ts.innerHTML=qa),Ql=e(s),r(es.$$.fragment,s),Al=e(s),ps=i(s,"P",{"data-svelte-h":!0}),c(ps)!=="svelte-1rksgl2"&&(ps.innerHTML=Sa),$l=e(s),r(is.$$.fragment,s),Xl=e(s),r(cs.$$.fragment,s),vl=e(s),js=i(s,"P",{"data-svelte-h":!0}),c(js)!=="svelte-1l26czp"&&(js.innerHTML=Ya),Nl=e(s),r(rs.$$.fragment,s),Gl=e(s),Ms=i(s,"P",{"data-svelte-h":!0}),c(Ms)!=="svelte-1yiqpne"&&(Ms.innerHTML=Ha),El=e(s),hs=i(s,"P",{"data-svelte-h":!0}),c(hs)!=="svelte-rkm9nf"&&(hs.textContent=Da),Bl=e(s),os=i(s,"P",{"data-svelte-h":!0}),c(os)!=="svelte-1pxhw5r"&&(os.innerHTML=La),Fl=e(s),r(xs.$$.fragment,s),Wl=e(s),gs=i(s,"P",{"data-svelte-h":!0}),c(gs)!=="svelte-mdy9jm"&&(gs.textContent=Oa),zl=e(s),r(Us.$$.fragment,s),ql=e(s),ms=i(s,"P",{"data-svelte-h":!0}),c(ms)!=="svelte-1gff4qz"&&(ms.textContent=Pa),Sl=e(s),r(ds.$$.fragment,s),Yl=e(s),Js=i(s,"P",{"data-svelte-h":!0}),c(Js)!=="svelte-zzn8kk"&&(Js.textContent=Ka),Hl=e(s),ys=i(s,"UL",{"data-svelte-h":!0}),c(ys)!=="svelte-f0s3pn"&&(ys.innerHTML=sn),Dl=e(s),ws=i(s,"P",{"data-svelte-h":!0}),c(ws)!=="svelte-1h1oq5v"&&(ws.textContent=ln),Ll=e(s),r(Ts.$$.fragment,s),Ol=e(s),us=i(s,"P",{"data-svelte-h":!0}),c(us)!=="svelte-10ovv0a"&&(us.textContent=an),Pl=e(s),r(Cs.$$.fragment,s),Kl=e(s),bs=i(s,"P",{"data-svelte-h":!0}),c(bs)!=="svelte-1cmvv07"&&(bs.textContent=nn),sa=e(s),r(fs.$$.fragment,s),la=e(s),r(Is.$$.fragment,s),aa=e(s),_s=i(s,"P",{"data-svelte-h":!0}),c(_s)!=="svelte-8iuplh"&&(_s.innerHTML=tn),na=e(s),r(Vs.$$.fragment,s),ta=e(s),Rs=i(s,"P",{"data-svelte-h":!0}),c(Rs)!=="svelte-1r96dak"&&(Rs.textContent=en),ea=e(s),ks=i(s,"UL",{"data-svelte-h":!0}),c(ks)!=="svelte-dhr9ud"&&(ks.innerHTML=pn),pa=e(s),r(Zs.$$.fragment,s),ia=e(s),Qs=i(s,"P",{"data-svelte-h":!0}),c(Qs)!=="svelte-1wyv6ig"&&(Qs.innerHTML=cn),ca=e(s),r(As.$$.fragment,s),ja=e(s),$s=i(s,"P",{"data-svelte-h":!0}),c($s)!=="svelte-1hdarm6"&&($s.textContent=jn),ra=e(s),r(Xs.$$.fragment,s),Ma=e(s),vs=i(s,"P",{"data-svelte-h":!0}),c(vs)!=="svelte-1heolva"&&(vs.innerHTML=rn),ha=e(s),r(Ns.$$.fragment,s),oa=e(s),Gs=i(s,"P",{"data-svelte-h":!0}),c(Gs)!=="svelte-1gum9w7"&&(Gs.innerHTML=Mn),xa=e(s),r(Es.$$.fragment,s),ga=e(s),r(Bs.$$.fragment,s),Ua=e(s),Fs=i(s,"P",{"data-svelte-h":!0}),c(Fs)!=="svelte-1flysn8"&&(Fs.innerHTML=hn),ma=e(s),Ws=i(s,"P",{"data-svelte-h":!0}),c(Ws)!=="svelte-1dcjn75"&&(Ws.textContent=on),da=e(s),r(zs.$$.fragment,s),Ja=e(s),qs=i(s,"P",{"data-svelte-h":!0}),c(qs)!=="svelte-gaec9h"&&(qs.textContent=xn),ya=e(s),r(Ss.$$.fragment,s),wa=e(s),Ys=i(s,"P",{"data-svelte-h":!0}),c(Ys)!=="svelte-4epvs7"&&(Ys.textContent=gn),Ta=e(s),Hs=i(s,"OL",{"data-svelte-h":!0}),c(Hs)!=="svelte-10zqnq5"&&(Hs.innerHTML=Un),ua=e(s),r(Ds.$$.fragment,s),Ca=e(s),Ls=i(s,"P",{}),dn(Ls).forEach(a),this.h()},h(){fa(U,"name","hf:doc:metadata"),fa(U,"content",Rn),fa(u,"class","flex justify-center")},m(s,l){bn(document.head,U),n(s,b,l),n(s,y,l),n(s,d,l),M(w,s,l),n(s,m,l),M(J,s,l),n(s,Ps,l),n(s,f,l),n(s,Ks,l),n(s,I,l),n(s,sl,l),n(s,_,l),n(s,ll,l),M(T,s,l),n(s,al,l),n(s,V,l),n(s,nl,l),n(s,R,l),n(s,tl,l),M(k,s,l),n(s,el,l),M(Z,s,l),n(s,pl,l),M(Q,s,l),n(s,il,l),n(s,A,l),n(s,cl,l),n(s,$,l),n(s,jl,l),M(X,s,l),n(s,rl,l),n(s,v,l),n(s,Ml,l),M(N,s,l),n(s,hl,l),M(G,s,l),n(s,ol,l),n(s,E,l),n(s,xl,l),M(B,s,l),n(s,gl,l),n(s,F,l),n(s,Ul,l),M(W,s,l),n(s,ml,l),n(s,z,l),n(s,dl,l),n(s,q,l),n(s,Jl,l),n(s,S,l),n(s,yl,l),M(Y,s,l),n(s,wl,l),n(s,H,l),n(s,Tl,l),M(D,s,l),n(s,ul,l),n(s,L,l),n(s,Cl,l),M(O,s,l),n(s,bl,l),n(s,P,l),n(s,fl,l),M(K,s,l),n(s,Il,l),n(s,u,l),n(s,_l,l),M(ss,s,l),n(s,Vl,l),n(s,ls,l),n(s,Rl,l),M(as,s,l),n(s,kl,l),M(ns,s,l),n(s,Zl,l),n(s,ts,l),n(s,Ql,l),M(es,s,l),n(s,Al,l),n(s,ps,l),n(s,$l,l),M(is,s,l),n(s,Xl,l),M(cs,s,l),n(s,vl,l),n(s,js,l),n(s,Nl,l),M(rs,s,l),n(s,Gl,l),n(s,Ms,l),n(s,El,l),n(s,hs,l),n(s,Bl,l),n(s,os,l),n(s,Fl,l),M(xs,s,l),n(s,Wl,l),n(s,gs,l),n(s,zl,l),M(Us,s,l),n(s,ql,l),n(s,ms,l),n(s,Sl,l),M(ds,s,l),n(s,Yl,l),n(s,Js,l),n(s,Hl,l),n(s,ys,l),n(s,Dl,l),n(s,ws,l),n(s,Ll,l),M(Ts,s,l),n(s,Ol,l),n(s,us,l),n(s,Pl,l),M(Cs,s,l),n(s,Kl,l),n(s,bs,l),n(s,sa,l),M(fs,s,l),n(s,la,l),M(Is,s,l),n(s,aa,l),n(s,_s,l),n(s,na,l),M(Vs,s,l),n(s,ta,l),n(s,Rs,l),n(s,ea,l),n(s,ks,l),n(s,pa,l),M(Zs,s,l),n(s,ia,l),n(s,Qs,l),n(s,ca,l),M(As,s,l),n(s,ja,l),n(s,$s,l),n(s,ra,l),M(Xs,s,l),n(s,Ma,l),n(s,vs,l),n(s,ha,l),M(Ns,s,l),n(s,oa,l),n(s,Gs,l),n(s,xa,l),M(Es,s,l),n(s,ga,l),M(Bs,s,l),n(s,Ua,l),n(s,Fs,l),n(s,ma,l),n(s,Ws,l),n(s,da,l),M(zs,s,l),n(s,Ja,l),n(s,qs,l),n(s,ya,l),M(Ss,s,l),n(s,wa,l),n(s,Ys,l),n(s,Ta,l),n(s,Hs,l),n(s,ua,l),M(Ds,s,l),n(s,Ca,l),n(s,Ls,l),ba=!0},p(s,[l]){const mn={};l&2&&(mn.$$scope={dirty:l,ctx:s}),T.$set(mn)},i(s){ba||(h(w.$$.fragment,s),h(J.$$.fragment,s),h(T.$$.fragment,s),h(k.$$.fragment,s),h(Z.$$.fragment,s),h(Q.$$.fragment,s),h(X.$$.fragment,s),h(N.$$.fragment,s),h(G.$$.fragment,s),h(B.$$.fragment,s),h(W.$$.fragment,s),h(Y.$$.fragment,s),h(D.$$.fragment,s),h(O.$$.fragment,s),h(K.$$.fragment,s),h(ss.$$.fragment,s),h(as.$$.fragment,s),h(ns.$$.fragment,s),h(es.$$.fragment,s),h(is.$$.fragment,s),h(cs.$$.fragment,s),h(rs.$$.fragment,s),h(xs.$$.fragment,s),h(Us.$$.fragment,s),h(ds.$$.fragment,s),h(Ts.$$.fragment,s),h(Cs.$$.fragment,s),h(fs.$$.fragment,s),h(Is.$$.fragment,s),h(Vs.$$.fragment,s),h(Zs.$$.fragment,s),h(As.$$.fragment,s),h(Xs.$$.fragment,s),h(Ns.$$.fragment,s),h(Es.$$.fragment,s),h(Bs.$$.fragment,s),h(zs.$$.fragment,s),h(Ss.$$.fragment,s),h(Ds.$$.fragment,s),ba=!0)},o(s){o(w.$$.fragment,s),o(J.$$.fragment,s),o(T.$$.fragment,s),o(k.$$.fragment,s),o(Z.$$.fragment,s),o(Q.$$.fragment,s),o(X.$$.fragment,s),o(N.$$.fragment,s),o(G.$$.fragment,s),o(B.$$.fragment,s),o(W.$$.fragment,s),o(Y.$$.fragment,s),o(D.$$.fragment,s),o(O.$$.fragment,s),o(K.$$.fragment,s),o(ss.$$.fragment,s),o(as.$$.fragment,s),o(ns.$$.fragment,s),o(es.$$.fragment,s),o(is.$$.fragment,s),o(cs.$$.fragment,s),o(rs.$$.fragment,s),o(xs.$$.fragment,s),o(Us.$$.fragment,s),o(ds.$$.fragment,s),o(Ts.$$.fragment,s),o(Cs.$$.fragment,s),o(fs.$$.fragment,s),o(Is.$$.fragment,s),o(Vs.$$.fragment,s),o(Zs.$$.fragment,s),o(As.$$.fragment,s),o(Xs.$$.fragment,s),o(Ns.$$.fragment,s),o(Es.$$.fragment,s),o(Bs.$$.fragment,s),o(zs.$$.fragment,s),o(Ss.$$.fragment,s),o(Ds.$$.fragment,s),ba=!1},d(s){s&&(a(b),a(y),a(d),a(m),a(Ps),a(f),a(Ks),a(I),a(sl),a(_),a(ll),a(al),a(V),a(nl),a(R),a(tl),a(el),a(pl),a(il),a(A),a(cl),a($),a(jl),a(rl),a(v),a(Ml),a(hl),a(ol),a(E),a(xl),a(gl),a(F),a(Ul),a(ml),a(z),a(dl),a(q),a(Jl),a(S),a(yl),a(wl),a(H),a(Tl),a(ul),a(L),a(Cl),a(bl),a(P),a(fl),a(Il),a(u),a(_l),a(Vl),a(ls),a(Rl),a(kl),a(Zl),a(ts),a(Ql),a(Al),a(ps),a($l),a(Xl),a(vl),a(js),a(Nl),a(Gl),a(Ms),a(El),a(hs),a(Bl),a(os),a(Fl),a(Wl),a(gs),a(zl),a(ql),a(ms),a(Sl),a(Yl),a(Js),a(Hl),a(ys),a(Dl),a(ws),a(Ll),a(Ol),a(us),a(Pl),a(Kl),a(bs),a(sa),a(la),a(aa),a(_s),a(na),a(ta),a(Rs),a(ea),a(ks),a(pa),a(ia),a(Qs),a(ca),a(ja),a($s),a(ra),a(Ma),a(vs),a(ha),a(oa),a(Gs),a(xa),a(ga),a(Ua),a(Fs),a(ma),a(Ws),a(da),a(Ja),a(qs),a(ya),a(wa),a(Ys),a(Ta),a(Hs),a(ua),a(Ca),a(Ls)),a(U),x(w,s),x(J,s),x(T,s),x(k,s),x(Z,s),x(Q,s),x(X,s),x(N,s),x(G,s),x(B,s),x(W,s),x(Y,s),x(D,s),x(O,s),x(K,s),x(ss,s),x(as,s),x(ns,s),x(es,s),x(is,s),x(cs,s),x(rs,s),x(xs,s),x(Us,s),x(ds,s),x(Ts,s),x(Cs,s),x(fs,s),x(Is,s),x(Vs,s),x(Zs,s),x(As,s),x(Xs,s),x(Ns,s),x(Es,s),x(Bs,s),x(zs,s),x(Ss,s),x(Ds,s)}}}const Rn='{"title":"문서 질의 응답(Document Question Answering)","local":"document_question_answering","sections":[{"title":"데이터 불러오기","local":"load-the-data","sections":[],"depth":2},{"title":"데이터 전처리","local":"preprocess-the-data","sections":[{"title":"문서 이미지 전처리","local":"preprocessing-document-images","sections":[],"depth":3},{"title":"텍스트 데이터 전처리","local":"preprocessing-text-data","sections":[],"depth":3}],"depth":2},{"title":"평가","local":"evaluation","sections":[],"depth":2},{"title":"훈련","local":"train","sections":[],"depth":2},{"title":"추론","local":"inference","sections":[],"depth":2}],"depth":1}';function kn(Os){return yn(()=>{new URLSearchParams(window.location.search).get("fw")}),[]}class Nn extends Tn{constructor(U){super(),un(this,U,kn,Vn,Jn,{})}}export{Nn as component};

Xet Storage Details

Size:
88.9 kB
·
Xet hash:
782442d6cb8d2d0c0e42e97ebe192472618e8a195b57c527cf4d0fd8c2910219

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.