Buckets:

hf-doc-build/doc-dev / transformers /main /ko /tasks /document_question_answering.html
rtrm's picture
download
raw
108 kB
<meta charset="utf-8" /><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;문서 질의 응답(Document Question Answering)&quot;,&quot;local&quot;:&quot;document_question_answering&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;데이터 불러오기&quot;,&quot;local&quot;:&quot;load-the-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;데이터 전처리&quot;,&quot;local&quot;:&quot;preprocess-the-data&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;문서 이미지 전처리&quot;,&quot;local&quot;:&quot;preprocessing-document-images&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;텍스트 데이터 전처리&quot;,&quot;local&quot;:&quot;preprocessing-text-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;평가&quot;,&quot;local&quot;:&quot;evaluation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;훈련&quot;,&quot;local&quot;:&quot;train&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;추론&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}">
<link href="/docs/transformers/main/ko/_app/immutable/assets/0.e3b0c442.css" rel="modulepreload">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/start.9aa88961.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/scheduler.9bc65507.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/singletons.9eec45c3.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.3b203c72.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/paths.566078f7.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/entry/app.84fb67c3.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/index.707bf1b6.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/0.1c99376b.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/each.e59479a4.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/nodes/63.de1521a8.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/Tip.c2ecdbf4.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/CodeBlock.54a9f38d.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/DocNotebookDropdown.41f65cb5.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/globals.7f7f1b26.js">
<link rel="modulepreload" href="/docs/transformers/main/ko/_app/immutable/chunks/EditOnGithub.922df6ba.js"><!-- HEAD_svelte-u9bgzb_START --><meta name="hf:doc:metadata" content="{&quot;title&quot;:&quot;문서 질의 응답(Document Question Answering)&quot;,&quot;local&quot;:&quot;document_question_answering&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;데이터 불러오기&quot;,&quot;local&quot;:&quot;load-the-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;데이터 전처리&quot;,&quot;local&quot;:&quot;preprocess-the-data&quot;,&quot;sections&quot;:[{&quot;title&quot;:&quot;문서 이미지 전처리&quot;,&quot;local&quot;:&quot;preprocessing-document-images&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3},{&quot;title&quot;:&quot;텍스트 데이터 전처리&quot;,&quot;local&quot;:&quot;preprocessing-text-data&quot;,&quot;sections&quot;:[],&quot;depth&quot;:3}],&quot;depth&quot;:2},{&quot;title&quot;:&quot;평가&quot;,&quot;local&quot;:&quot;evaluation&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;훈련&quot;,&quot;local&quot;:&quot;train&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2},{&quot;title&quot;:&quot;추론&quot;,&quot;local&quot;:&quot;inference&quot;,&quot;sections&quot;:[],&quot;depth&quot;:2}],&quot;depth&quot;:1}"><!-- HEAD_svelte-u9bgzb_END --> <p></p> <h1 class="relative group"><a id="document_question_answering" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#document_question_answering"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>문서 질의 응답(Document Question Answering)</span></h1> <div class="flex space-x-1 absolute z-10 right-0 top-0"> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Colab" class="!m-0" src="https://colab.research.google.com/assets/colab-badge.svg"> </button> </div> <div class="relative colab-dropdown "> <button class=" " type="button"> <img alt="Open In Studio Lab" class="!m-0" src="https://studiolab.sagemaker.aws/studiolab.svg"> </button> </div></div> <p data-svelte-h="svelte-oqnn9h">문서 시각적 질의 응답(Document Visual Question Answering)이라고도 하는
문서 질의 응답(Document Question Answering)은 문서 이미지에 대한 질문에 답변을 주는 태스크입니다.
이 태스크를 지원하는 모델의 입력은 일반적으로 이미지와 질문의 조합이고, 출력은 자연어로 된 답변입니다. 이러한 모델은 텍스트, 단어의 위치(바운딩 박스), 이미지 등 다양한 모달리티를 활용합니다.</p> <p data-svelte-h="svelte-k9bbb9">이 가이드는 다음 내용을 설명합니다:</p> <ul data-svelte-h="svelte-1t9y1pd"><li><a href="https://huggingface.co/datasets/nielsr/docvqa_1200_examples_donut" rel="nofollow">DocVQA dataset</a>을 사용해 <a href="../model_doc/layoutlmv2">LayoutLMv2</a> 미세 조정하기</li> <li>추론을 위해 미세 조정된 모델을 사용하기</li></ul> <div class="course-tip bg-gradient-to-br dark:bg-gradient-to-r before:border-green-500 dark:before:border-green-800 from-green-50 dark:from-gray-900 to-white dark:to-gray-950 border border-green-50 text-green-700 dark:text-gray-400"><p data-svelte-h="svelte-17cmma8">이 작업과 호환되는 모든 아키텍처와 체크포인트를 보려면 <a href="https://huggingface.co/tasks/image-to-text" rel="nofollow">작업 페이지</a>를 확인하는 것이 좋습니다.</p></div> <p data-svelte-h="svelte-1t45j8o">LayoutLMv2는 토큰의 마지막 은닉층 위에 질의 응답 헤드를 추가해 답변의 시작 토큰과 끝 토큰의 위치를 예측함으로써 문서 질의 응답 태스크를 해결합니다. 즉, 문맥이 주어졌을 때 질문에 답하는 정보를 추출하는 추출형 질의 응답(Extractive question answering)으로 문제를 처리합니다.
문맥은 OCR 엔진의 출력에서 가져오며, 여기서는 Google의 Tesseract를 사용합니다.</p> <p data-svelte-h="svelte-ddzvb0">시작하기 전에 필요한 라이브러리가 모두 설치되어 있는지 확인하세요. LayoutLMv2는 detectron2, torchvision 및 테서랙트를 필요로 합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install -q transformers datasets<!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->pip install <span class="hljs-string">&#x27;git+https://github.com/facebookresearch/detectron2.git&#x27;</span>
pip install torchvision<!-- HTML_TAG_END --></pre></div> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START -->sudo apt install tesseract-ocr
pip install -q pytesseract<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1uvmu64">필요한 라이브러리들을 모두 설치한 후 런타임을 다시 시작합니다.</p> <p data-svelte-h="svelte-xyvcw8">커뮤니티에 당신의 모델을 공유하는 것을 권장합니다. Hugging Face 계정에 로그인해서 모델을 🤗 Hub에 업로드하세요.
프롬프트가 실행되면, 로그인을 위해 토큰을 입력하세요:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> huggingface_hub <span class="hljs-keyword">import</span> notebook_login
<span class="hljs-meta">&gt;&gt;&gt; </span>notebook_login()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-13bifrs">몇 가지 전역 변수를 정의해 보겠습니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>model_checkpoint = <span class="hljs-string">&quot;microsoft/layoutlmv2-base-uncased&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>batch_size = <span class="hljs-number">4</span><!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="load-the-data" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#load-the-data"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>데이터 불러오기</span></h2> <p data-svelte-h="svelte-12y9xdp">이 가이드에서는 🤗 Hub에서 찾을 수 있는 전처리된 DocVQA의 작은 샘플을 사용합니다.
DocVQA의 전체 데이터 세트를 사용하고 싶다면, <a href="https://rrc.cvc.uab.es/?ch=17" rel="nofollow">DocVQA homepage</a>에 가입 후 다운로드 할 수 있습니다. 전체 데이터 세트를 다운로드 했다면, 이 가이드를 계속 진행하기 위해 <a href="https://huggingface.co/docs/datasets/loading#local-and-remote-files" rel="nofollow">🤗 dataset에 파일을 가져오는 방법</a>을 확인하세요.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> datasets <span class="hljs-keyword">import</span> load_dataset
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset = load_dataset(<span class="hljs-string">&quot;nielsr/docvqa_1200_examples&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>dataset
DatasetDict({
train: Dataset({
features: [<span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;query&#x27;</span>, <span class="hljs-string">&#x27;answers&#x27;</span>, <span class="hljs-string">&#x27;words&#x27;</span>, <span class="hljs-string">&#x27;bounding_boxes&#x27;</span>, <span class="hljs-string">&#x27;answer&#x27;</span>],
num_rows: <span class="hljs-number">1000</span>
})
test: Dataset({
features: [<span class="hljs-string">&#x27;id&#x27;</span>, <span class="hljs-string">&#x27;image&#x27;</span>, <span class="hljs-string">&#x27;query&#x27;</span>, <span class="hljs-string">&#x27;answers&#x27;</span>, <span class="hljs-string">&#x27;words&#x27;</span>, <span class="hljs-string">&#x27;bounding_boxes&#x27;</span>, <span class="hljs-string">&#x27;answer&#x27;</span>],
num_rows: <span class="hljs-number">200</span>
})
})<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-9rg4tz">보시다시피, 데이터 세트는 이미 훈련 세트와 테스트 세트로 나누어져 있습니다. 무작위로 예제를 살펴보면서 특성을 확인해보세요.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset[<span class="hljs-string">&quot;train&quot;</span>].features<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zq0ej4">각 필드가 나타내는 내용은 다음과 같습니다:</p> <ul data-svelte-h="svelte-g2ws24"><li><code>id</code>: 예제의 id</li> <li><code>image</code>: 문서 이미지를 포함하는 PIL.Image.Image 객체</li> <li><code>query</code>: 질문 문자열 - 여러 언어의 자연어로 된 질문</li> <li><code>answers</code>: 사람이 주석을 단 정답 리스트</li> <li><code>words</code> and <code>bounding_boxes</code>: OCR의 결과값들이며 이 가이드에서는 사용하지 않을 예정</li> <li><code>answer</code>: 다른 모델과 일치하는 답변이며 이 가이드에서는 사용하지 않을 예정</li></ul> <p data-svelte-h="svelte-xeapog">영어로 된 질문만 남기고 다른 모델에 대한 예측을 포함하는 <code>answer</code> 특성을 삭제하겠습니다.
그리고 주석 작성자가 제공한 데이터 세트에서 첫 번째 답변을 가져옵니다. 또는 무작위로 샘플을 추출할 수도 있습니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = dataset.<span class="hljs-built_in">map</span>(<span class="hljs-keyword">lambda</span> example: {<span class="hljs-string">&quot;question&quot;</span>: example[<span class="hljs-string">&quot;query&quot;</span>][<span class="hljs-string">&quot;en&quot;</span>]}, remove_columns=[<span class="hljs-string">&quot;query&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> <span class="hljs-keyword">lambda</span> example: {<span class="hljs-string">&quot;answer&quot;</span>: example[<span class="hljs-string">&quot;answers&quot;</span>][<span class="hljs-number">0</span>]}, remove_columns=[<span class="hljs-string">&quot;answer&quot;</span>, <span class="hljs-string">&quot;answers&quot;</span>]
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-m4zkx9">이 가이드에서 사용하는 LayoutLMv2 체크포인트는 <code>max_position_embeddings = 512</code>로 훈련되었습니다(이 정보는 <a href="https://huggingface.co/microsoft/layoutlmv2-base-uncased/blob/main/config.json#L18" rel="nofollow">체크포인트의 <code>config.json</code> 파일</a>에서 확인할 수 있습니다).
바로 예제를 잘라낼 수도 있지만, 긴 문서의 끝에 답변이 있어 잘리는 상황을 피하기 위해 여기서는 임베딩이 512보다 길어질 가능성이 있는 몇 가지 예제를 제거하겠습니다.
데이터 세트에 있는 대부분의 문서가 긴 경우 슬라이딩 윈도우 방법을 사용할 수 있습니다 - 자세한 내용을 확인하고 싶으면 이 <a href="https://github.com/huggingface/notebooks/blob/main/examples/question_answering.ipynb" rel="nofollow">노트북</a>을 확인하세요.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.<span class="hljs-built_in">filter</span>(<span class="hljs-keyword">lambda</span> x: <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;words&quot;</span>]) + <span class="hljs-built_in">len</span>(x[<span class="hljs-string">&quot;question&quot;</span>].split()) &lt; <span class="hljs-number">512</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1xnig6m">이 시점에서 이 데이터 세트의 OCR 특성도 제거해 보겠습니다. OCR 특성은 다른 모델을 미세 조정하기 위한 것으로, 이 가이드에서 사용하는 모델의 입력 요구 사항과 일치하지 않기 때문에 이 특성을 사용하기 위해서는 일부 처리가 필요합니다.
대신, 원본 데이터에 <code>LayoutLMv2Processor</code>를 사용하여 OCR 및 토큰화를 모두 수행할 수 있습니다.
이렇게 하면 모델이 요구하는 입력을 얻을 수 있습니다.
이미지를 수동으로 처리하려면, <a href="../model_doc/layoutlmv2"><code>LayoutLMv2</code> model documentation</a>에서 모델이 요구하는 입력 포맷을 확인해보세요.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.remove_columns(<span class="hljs-string">&quot;words&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset = updated_dataset.remove_columns(<span class="hljs-string">&quot;bounding_boxes&quot;</span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-lp4iy7">마지막으로, 데이터 탐색을 완료하기 위해 이미지 예시를 살펴봅시다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>updated_dataset[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">11</span>][<span class="hljs-string">&quot;image&quot;</span>]<!-- HTML_TAG_END --></pre></div> <div class="flex justify-center" data-svelte-h="svelte-q63tj1"><img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/docvqa_example.jpg" alt="DocVQA Image Example"></div> <h2 class="relative group"><a id="preprocess-the-data" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocess-the-data"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>데이터 전처리</span></h2> <p data-svelte-h="svelte-1y93l9d">문서 질의 응답 태스크는 멀티모달 태스크이며, 각 모달리티의 입력이 모델의 요구에 맞게 전처리 되었는지 확인해야 합니다.
이미지 데이터를 처리할 수 있는 이미지 프로세서와 텍스트 데이터를 인코딩할 수 있는 토크나이저를 결합한 <code>LayoutLMv2Processor</code>를 가져오는 것부터 시작해 보겠습니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="preprocessing-document-images" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocessing-document-images"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>문서 이미지 전처리</span></h3> <p data-svelte-h="svelte-1kcsbqg">먼저, 프로세서의 <code>image_processor</code>를 사용해 모델에 대한 문서 이미지를 준비해 보겠습니다.
기본값으로, 이미지 프로세서는 이미지 크기를 224x224로 조정하고 색상 채널의 순서가 올바른지 확인한 후 단어와 정규화된 바운딩 박스를 얻기 위해 테서랙트를 사용해 OCR를 적용합니다.
이 튜토리얼에서 우리가 필요한 것과 기본값은 완전히 동일합니다. 이미지 배치에 기본 이미지 처리를 적용하고 OCR의 결과를 변환하는 함수를 작성합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>image_processor = processor.image_processor
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">get_ocr_words_and_boxes</span>(<span class="hljs-params">examples</span>):
<span class="hljs-meta">... </span> images = [image.convert(<span class="hljs-string">&quot;RGB&quot;</span>) <span class="hljs-keyword">for</span> image <span class="hljs-keyword">in</span> examples[<span class="hljs-string">&quot;image&quot;</span>]]
<span class="hljs-meta">... </span> encoded_inputs = image_processor(images)
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;image&quot;</span>] = encoded_inputs.pixel_values
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;words&quot;</span>] = encoded_inputs.words
<span class="hljs-meta">... </span> examples[<span class="hljs-string">&quot;boxes&quot;</span>] = encoded_inputs.boxes
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> examples<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1rksgl2">이 전처리를 데이터 세트 전체에 빠르게 적용하려면 <code>map</code>를 사용하세요.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>dataset_with_ocr = updated_dataset.<span class="hljs-built_in">map</span>(get_ocr_words_and_boxes, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>)<!-- HTML_TAG_END --></pre></div> <h3 class="relative group"><a id="preprocessing-text-data" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#preprocessing-text-data"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>텍스트 데이터 전처리</span></h3> <p data-svelte-h="svelte-1l26czp">이미지에 OCR을 적용했으면 데이터 세트의 텍스트 부분을 모델에 맞게 인코딩해야 합니다.
이 인코딩에는 이전 단계에서 가져온 단어와 박스를 토큰 수준의 <code>input_ids</code>, <code>attention_mask</code>, <code>token_type_ids</code><code>bbox</code>로 변환하는 작업이 포함됩니다.
텍스트를 전처리하려면 프로세서의 <code>tokenizer</code>가 필요합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer = processor.tokenizer<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1yiqpne">위에서 언급한 전처리 외에도 모델을 위해 레이블을 추가해야 합니다. 🤗 Transformers의 <code>xxxForQuestionAnswering</code> 모델의 경우, 레이블은 <code>start_positions</code><code>end_positions</code>로 구성되며 어떤 토큰이 답변의 시작과 끝에 있는지를 나타냅니다.</p> <p data-svelte-h="svelte-rkm9nf">레이블 추가를 위해서, 먼저 더 큰 리스트(단어 리스트)에서 하위 리스트(단어로 분할된 답변)을 찾을 수 있는 헬퍼 함수를 정의합니다.</p> <p data-svelte-h="svelte-1pxhw5r">이 함수는 <code>words_list</code><code>answer_list</code>, 이렇게 두 리스트를 입력으로 받습니다.
그런 다음 <code>words_list</code>를 반복하여 <code>words_list</code>의 현재 단어(words_list[i])가 <code>answer_list</code>의 첫 번째 단어(answer_list[0])와 같은지,
현재 단어에서 시작해 <code>answer_list</code>와 같은 길이만큼의 <code>words_list</code>의 하위 리스트가 <code>answer_list</code>와 일치하는지 확인합니다.
이 조건이 참이라면 일치하는 항목을 발견했음을 의미하며, 함수는 일치 항목, 시작 인덱스(idx) 및 종료 인덱스(idx + len(answer_list) - 1)를 기록합니다. 일치하는 항목이 두 개 이상 발견되면 함수는 첫 번째 항목만 반환합니다. 일치하는 항목이 없다면 함수는 (<code>None</code>, 0, 0)을 반환합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">subfinder</span>(<span class="hljs-params">words_list, answer_list</span>):
<span class="hljs-meta">... </span> matches = []
<span class="hljs-meta">... </span> start_indices = []
<span class="hljs-meta">... </span> end_indices = []
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> idx, i <span class="hljs-keyword">in</span> <span class="hljs-built_in">enumerate</span>(<span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(words_list))):
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> words_list[i] == answer_list[<span class="hljs-number">0</span>] <span class="hljs-keyword">and</span> words_list[i : i + <span class="hljs-built_in">len</span>(answer_list)] == answer_list:
<span class="hljs-meta">... </span> matches.append(answer_list)
<span class="hljs-meta">... </span> start_indices.append(idx)
<span class="hljs-meta">... </span> end_indices.append(idx + <span class="hljs-built_in">len</span>(answer_list) - <span class="hljs-number">1</span>)
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> matches:
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> matches[<span class="hljs-number">0</span>], start_indices[<span class="hljs-number">0</span>], end_indices[<span class="hljs-number">0</span>]
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> <span class="hljs-literal">None</span>, <span class="hljs-number">0</span>, <span class="hljs-number">0</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-mdy9jm">이 함수가 어떻게 정답의 위치를 찾는지 설명하기 위해 다음 예제에서 함수를 사용해 보겠습니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>example = dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>][<span class="hljs-number">1</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>words = [word.lower() <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> example[<span class="hljs-string">&quot;words&quot;</span>]]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">match</span>, word_idx_start, word_idx_end = subfinder(words, example[<span class="hljs-string">&quot;answer&quot;</span>].lower().split())
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Question: &quot;</span>, example[<span class="hljs-string">&quot;question&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Words:&quot;</span>, words)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;Answer: &quot;</span>, example[<span class="hljs-string">&quot;answer&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;start_index&quot;</span>, word_idx_start)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(<span class="hljs-string">&quot;end_index&quot;</span>, word_idx_end)
Question: Who <span class="hljs-keyword">is</span> <span class="hljs-keyword">in</span> cc <span class="hljs-keyword">in</span> this letter?
Words: [<span class="hljs-string">&#x27;wie&#x27;</span>, <span class="hljs-string">&#x27;baw&#x27;</span>, <span class="hljs-string">&#x27;brown&#x27;</span>, <span class="hljs-string">&#x27;&amp;&#x27;</span>, <span class="hljs-string">&#x27;williamson&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;corporation&#x27;</span>, <span class="hljs-string">&#x27;research&#x27;</span>, <span class="hljs-string">&#x27;&amp;&#x27;</span>, <span class="hljs-string">&#x27;development&#x27;</span>, <span class="hljs-string">&#x27;internal&#x27;</span>, <span class="hljs-string">&#x27;correspondence&#x27;</span>, <span class="hljs-string">&#x27;to:&#x27;</span>, <span class="hljs-string">&#x27;r.&#x27;</span>, <span class="hljs-string">&#x27;h.&#x27;</span>, <span class="hljs-string">&#x27;honeycutt&#x27;</span>, <span class="hljs-string">&#x27;ce:&#x27;</span>, <span class="hljs-string">&#x27;t.f.&#x27;</span>, <span class="hljs-string">&#x27;riehl&#x27;</span>, <span class="hljs-string">&#x27;from:&#x27;</span>, <span class="hljs-string">&#x27;.&#x27;</span>, <span class="hljs-string">&#x27;c.j.&#x27;</span>, <span class="hljs-string">&#x27;cook&#x27;</span>, <span class="hljs-string">&#x27;date:&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;8,&#x27;</span>, <span class="hljs-string">&#x27;1995&#x27;</span>, <span class="hljs-string">&#x27;subject:&#x27;</span>, <span class="hljs-string">&#x27;review&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;existing&#x27;</span>, <span class="hljs-string">&#x27;brainstorming&#x27;</span>, <span class="hljs-string">&#x27;ideas/483&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;major&#x27;</span>, <span class="hljs-string">&#x27;function&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;product&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;graup&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;develop&#x27;</span>, <span class="hljs-string">&#x27;marketable&#x27;</span>, <span class="hljs-string">&#x27;nove!&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;would&#x27;</span>, <span class="hljs-string">&#x27;be&#x27;</span>, <span class="hljs-string">&#x27;profitable&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;manufacture&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;sell.&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;defined&#x27;</span>, <span class="hljs-string">&#x27;as:&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;kind,&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;anything&#x27;</span>, <span class="hljs-string">&#x27;seen&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;known&#x27;</span>, <span class="hljs-string">&#x27;before.&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;defined&#x27;</span>, <span class="hljs-string">&#x27;as:&#x27;</span>, <span class="hljs-string">&#x27;something&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;introduced;&#x27;</span>, <span class="hljs-string">&#x27;act&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;innovating;&#x27;</span>, <span class="hljs-string">&#x27;introduction&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;new&#x27;</span>, <span class="hljs-string">&#x27;things&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;methods.&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;incorporate&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;latest&#x27;</span>, <span class="hljs-string">&#x27;technologies,&#x27;</span>, <span class="hljs-string">&#x27;materials&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;know-how&#x27;</span>, <span class="hljs-string">&#x27;available&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;give&#x27;</span>, <span class="hljs-string">&#x27;then&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;unique&#x27;</span>, <span class="hljs-string">&#x27;taste&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;look.&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;first&#x27;</span>, <span class="hljs-string">&#x27;task&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;product&#x27;</span>, <span class="hljs-string">&#x27;innovation&#x27;</span>, <span class="hljs-string">&#x27;group&#x27;</span>, <span class="hljs-string">&#x27;was&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;assemble,&#x27;</span>, <span class="hljs-string">&#x27;review&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;categorize&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;list&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;existing&#x27;</span>, <span class="hljs-string">&#x27;brainstorming&#x27;</span>, <span class="hljs-string">&#x27;ideas.&#x27;</span>, <span class="hljs-string">&#x27;ideas&#x27;</span>, <span class="hljs-string">&#x27;were&#x27;</span>, <span class="hljs-string">&#x27;grouped&#x27;</span>, <span class="hljs-string">&#x27;into&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;major&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;labeled&#x27;</span>, <span class="hljs-string">&#x27;appearance&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;taste/aroma.&#x27;</span>, <span class="hljs-string">&#x27;these&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;are&#x27;</span>, <span class="hljs-string">&#x27;used&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;may&#x27;</span>, <span class="hljs-string">&#x27;differ&#x27;</span>, <span class="hljs-string">&#x27;from&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;visual&#x27;</span>, <span class="hljs-string">&#x27;and/or&#x27;</span>, <span class="hljs-string">&#x27;taste/aroma&#x27;</span>, <span class="hljs-string">&#x27;point&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;view&#x27;</span>, <span class="hljs-string">&#x27;compared&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;canventional&#x27;</span>, <span class="hljs-string">&#x27;cigarettes.&#x27;</span>, <span class="hljs-string">&#x27;other&#x27;</span>, <span class="hljs-string">&#x27;categories&#x27;</span>, <span class="hljs-string">&#x27;include&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;combination&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;the&#x27;</span>, <span class="hljs-string">&#x27;above,&#x27;</span>, <span class="hljs-string">&#x27;filters,&#x27;</span>, <span class="hljs-string">&#x27;packaging&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;brand&#x27;</span>, <span class="hljs-string">&#x27;extensions.&#x27;</span>, <span class="hljs-string">&#x27;appearance&#x27;</span>, <span class="hljs-string">&#x27;this&#x27;</span>, <span class="hljs-string">&#x27;category&#x27;</span>, <span class="hljs-string">&#x27;is&#x27;</span>, <span class="hljs-string">&#x27;used&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;novel&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;constructions&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;yield&#x27;</span>, <span class="hljs-string">&#x27;visually&#x27;</span>, <span class="hljs-string">&#x27;different&#x27;</span>, <span class="hljs-string">&#x27;products&#x27;</span>, <span class="hljs-string">&#x27;with&#x27;</span>, <span class="hljs-string">&#x27;minimal&#x27;</span>, <span class="hljs-string">&#x27;changes&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;smoke&#x27;</span>, <span class="hljs-string">&#x27;chemistry&#x27;</span>, <span class="hljs-string">&#x27;two&#x27;</span>, <span class="hljs-string">&#x27;cigarettes&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;cne.&#x27;</span>, <span class="hljs-string">&#x27;emulti-plug&#x27;</span>, <span class="hljs-string">&#x27;te&#x27;</span>, <span class="hljs-string">&#x27;build&#x27;</span>, <span class="hljs-string">&#x27;yaur&#x27;</span>, <span class="hljs-string">&#x27;awn&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;eswitchable&#x27;</span>, <span class="hljs-string">&#x27;menthol&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;non&#x27;</span>, <span class="hljs-string">&#x27;menthol&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;*cigarettes&#x27;</span>, <span class="hljs-string">&#x27;with&#x27;</span>, <span class="hljs-string">&#x27;interspaced&#x27;</span>, <span class="hljs-string">&#x27;perforations&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;enable&#x27;</span>, <span class="hljs-string">&#x27;smoker&#x27;</span>, <span class="hljs-string">&#x27;to&#x27;</span>, <span class="hljs-string">&#x27;separate&#x27;</span>, <span class="hljs-string">&#x27;unburned&#x27;</span>, <span class="hljs-string">&#x27;section&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;future&#x27;</span>, <span class="hljs-string">&#x27;smoking.&#x27;</span>, <span class="hljs-string">&#x27;«short&#x27;</span>, <span class="hljs-string">&#x27;cigarette,&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;section&#x27;</span>, <span class="hljs-string">&#x27;30&#x27;</span>, <span class="hljs-string">&#x27;mm.&#x27;</span>, <span class="hljs-string">&#x27;«extremely&#x27;</span>, <span class="hljs-string">&#x27;fast&#x27;</span>, <span class="hljs-string">&#x27;buming&#x27;</span>, <span class="hljs-string">&#x27;cigarette.&#x27;</span>, <span class="hljs-string">&#x27;«novel&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;constructions&#x27;</span>, <span class="hljs-string">&#x27;that&#x27;</span>, <span class="hljs-string">&#x27;permit&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;significant&#x27;</span>, <span class="hljs-string">&#x27;reduction&#x27;</span>, <span class="hljs-string">&#x27;iretobacco&#x27;</span>, <span class="hljs-string">&#x27;weight&#x27;</span>, <span class="hljs-string">&#x27;while&#x27;</span>, <span class="hljs-string">&#x27;maintaining&#x27;</span>, <span class="hljs-string">&#x27;smoking&#x27;</span>, <span class="hljs-string">&#x27;mechanics&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;visual&#x27;</span>, <span class="hljs-string">&#x27;characteristics.&#x27;</span>, <span class="hljs-string">&#x27;higher&#x27;</span>, <span class="hljs-string">&#x27;basis&#x27;</span>, <span class="hljs-string">&#x27;weight&#x27;</span>, <span class="hljs-string">&#x27;paper:&#x27;</span>, <span class="hljs-string">&#x27;potential&#x27;</span>, <span class="hljs-string">&#x27;reduction&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;weight.&#x27;</span>, <span class="hljs-string">&#x27;«more&#x27;</span>, <span class="hljs-string">&#x27;rigid&#x27;</span>, <span class="hljs-string">&#x27;tobacco&#x27;</span>, <span class="hljs-string">&#x27;column;&#x27;</span>, <span class="hljs-string">&#x27;stiffing&#x27;</span>, <span class="hljs-string">&#x27;agent&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;tobacco;&#x27;</span>, <span class="hljs-string">&#x27;e.g.&#x27;</span>, <span class="hljs-string">&#x27;starch&#x27;</span>, <span class="hljs-string">&#x27;*colored&#x27;</span>, <span class="hljs-string">&#x27;tow&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;cigarette&#x27;</span>, <span class="hljs-string">&#x27;papers;&#x27;</span>, <span class="hljs-string">&#x27;seasonal&#x27;</span>, <span class="hljs-string">&#x27;promotions,&#x27;</span>, <span class="hljs-string">&#x27;e.g.&#x27;</span>, <span class="hljs-string">&#x27;pastel&#x27;</span>, <span class="hljs-string">&#x27;colored&#x27;</span>, <span class="hljs-string">&#x27;cigarettes&#x27;</span>, <span class="hljs-string">&#x27;for&#x27;</span>, <span class="hljs-string">&#x27;easter&#x27;</span>, <span class="hljs-string">&#x27;or&#x27;</span>, <span class="hljs-string">&#x27;in&#x27;</span>, <span class="hljs-string">&#x27;an&#x27;</span>, <span class="hljs-string">&#x27;ebony&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;ivory&#x27;</span>, <span class="hljs-string">&#x27;brand&#x27;</span>, <span class="hljs-string">&#x27;containing&#x27;</span>, <span class="hljs-string">&#x27;a&#x27;</span>, <span class="hljs-string">&#x27;mixture&#x27;</span>, <span class="hljs-string">&#x27;of&#x27;</span>, <span class="hljs-string">&#x27;all&#x27;</span>, <span class="hljs-string">&#x27;black&#x27;</span>, <span class="hljs-string">&#x27;(black&#x27;</span>, <span class="hljs-string">&#x27;paper&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;tow)&#x27;</span>, <span class="hljs-string">&#x27;and&#x27;</span>, <span class="hljs-string">&#x27;ail&#x27;</span>, <span class="hljs-string">&#x27;white&#x27;</span>, <span class="hljs-string">&#x27;cigarettes.&#x27;</span>, <span class="hljs-string">&#x27;499150498&#x27;</span>]
Answer: T.F. Riehl
start_index <span class="hljs-number">17</span>
end_index <span class="hljs-number">18</span><!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gff4qz">한편, 위 예제가 인코딩되면 다음과 같이 표시됩니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoding = tokenizer(example[<span class="hljs-string">&quot;question&quot;</span>], example[<span class="hljs-string">&quot;words&quot;</span>], example[<span class="hljs-string">&quot;boxes&quot;</span>])
<span class="hljs-meta">&gt;&gt;&gt; </span>tokenizer.decode(encoding[<span class="hljs-string">&quot;input_ids&quot;</span>])
[CLS] who <span class="hljs-keyword">is</span> <span class="hljs-keyword">in</span> cc <span class="hljs-keyword">in</span> this letter? [SEP] wie baw brown &amp; williamson tobacco corporation research &amp; development ...<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-zzn8kk">이제 인코딩된 입력에서 정답의 위치를 찾아야 합니다.</p> <ul data-svelte-h="svelte-f0s3pn"><li><code>token_type_ids</code>는 어떤 토큰이 질문에 속하는지, 그리고 어떤 토큰이 문서의 단어에 포함되는지를 알려줍니다.</li> <li><code>tokenizer.cls_token_id</code> 입력의 시작 부분에 있는 특수 토큰을 찾는 데 도움을 줍니다.</li> <li><code>word_ids</code>는 원본 <code>words</code>에서 찾은 답변을 전체 인코딩된 입력의 동일한 답과 일치시키고 인코딩된 입력에서 답변의 시작/끝 위치를 결정합니다.</li></ul> <p data-svelte-h="svelte-1h1oq5v">위 내용들을 염두에 두고 데이터 세트 예제의 배치를 인코딩하는 함수를 만들어 보겠습니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">def</span> <span class="hljs-title function_">encode_dataset</span>(<span class="hljs-params">examples, max_length=<span class="hljs-number">512</span></span>):
<span class="hljs-meta">... </span> questions = examples[<span class="hljs-string">&quot;question&quot;</span>]
<span class="hljs-meta">... </span> words = examples[<span class="hljs-string">&quot;words&quot;</span>]
<span class="hljs-meta">... </span> boxes = examples[<span class="hljs-string">&quot;boxes&quot;</span>]
<span class="hljs-meta">... </span> answers = examples[<span class="hljs-string">&quot;answer&quot;</span>]
<span class="hljs-meta">... </span> <span class="hljs-comment"># 예제 배치를 인코딩하고 start_positions와 end_positions를 초기화합니다</span>
<span class="hljs-meta">... </span> encoding = tokenizer(questions, words, boxes, max_length=max_length, padding=<span class="hljs-string">&quot;max_length&quot;</span>, truncation=<span class="hljs-literal">True</span>)
<span class="hljs-meta">... </span> start_positions = []
<span class="hljs-meta">... </span> end_positions = []
<span class="hljs-meta">... </span> <span class="hljs-comment"># 배치의 예제를 반복합니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> <span class="hljs-built_in">range</span>(<span class="hljs-built_in">len</span>(questions)):
<span class="hljs-meta">... </span> cls_index = encoding[<span class="hljs-string">&quot;input_ids&quot;</span>][i].index(tokenizer.cls_token_id)
<span class="hljs-meta">... </span> <span class="hljs-comment"># 예제의 words에서 답변의 위치를 찾습니다</span>
<span class="hljs-meta">... </span> words_example = [word.lower() <span class="hljs-keyword">for</span> word <span class="hljs-keyword">in</span> words[i]]
<span class="hljs-meta">... </span> answer = answers[i]
<span class="hljs-meta">... </span> <span class="hljs-keyword">match</span>, word_idx_start, word_idx_end = subfinder(words_example, answer.lower().split())
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-keyword">match</span>:
<span class="hljs-meta">... </span> <span class="hljs-comment"># 일치하는 항목을 발견하면, `token_type_ids`를 사용해 인코딩에서 단어가 시작하는 위치를 찾습니다</span>
<span class="hljs-meta">... </span> token_type_ids = encoding[<span class="hljs-string">&quot;token_type_ids&quot;</span>][i]
<span class="hljs-meta">... </span> token_start_index = <span class="hljs-number">0</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">while</span> token_type_ids[token_start_index] != <span class="hljs-number">1</span>:
<span class="hljs-meta">... </span> token_start_index += <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> token_end_index = <span class="hljs-built_in">len</span>(encoding[<span class="hljs-string">&quot;input_ids&quot;</span>][i]) - <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">while</span> token_type_ids[token_end_index] != <span class="hljs-number">1</span>:
<span class="hljs-meta">... </span> token_end_index -= <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> word_ids = encoding.word_ids(i)[token_start_index : token_end_index + <span class="hljs-number">1</span>]
<span class="hljs-meta">... </span> start_position = cls_index
<span class="hljs-meta">... </span> end_position = cls_index
<span class="hljs-meta">... </span> <span class="hljs-comment"># words의 답변 위치와 일치할 때까지 word_ids를 반복하고 `token_start_index`를 늘립니다</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># 일치하면 `token_start_index`를 인코딩에서 답변의 `start_position`으로 저장합니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> <span class="hljs-built_in">id</span> <span class="hljs-keyword">in</span> word_ids:
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-built_in">id</span> == word_idx_start:
<span class="hljs-meta">... </span> start_position = token_start_index
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> token_start_index += <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> <span class="hljs-comment"># 비슷하게, 끝에서 시작해 `word_ids`를 반복하며 답변의 `end_position`을 찾습니다</span>
<span class="hljs-meta">... </span> <span class="hljs-keyword">for</span> <span class="hljs-built_in">id</span> <span class="hljs-keyword">in</span> word_ids[::-<span class="hljs-number">1</span>]:
<span class="hljs-meta">... </span> <span class="hljs-keyword">if</span> <span class="hljs-built_in">id</span> == word_idx_end:
<span class="hljs-meta">... </span> end_position = token_end_index
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> token_end_index -= <span class="hljs-number">1</span>
<span class="hljs-meta">... </span> start_positions.append(start_position)
<span class="hljs-meta">... </span> end_positions.append(end_position)
<span class="hljs-meta">... </span> <span class="hljs-keyword">else</span>:
<span class="hljs-meta">... </span> start_positions.append(cls_index)
<span class="hljs-meta">... </span> end_positions.append(cls_index)
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;image&quot;</span>] = examples[<span class="hljs-string">&quot;image&quot;</span>]
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;start_positions&quot;</span>] = start_positions
<span class="hljs-meta">... </span> encoding[<span class="hljs-string">&quot;end_positions&quot;</span>] = end_positions
<span class="hljs-meta">... </span> <span class="hljs-keyword">return</span> encoding<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-10ovv0a">이제 이 전처리 함수가 있으니 전체 데이터 세트를 인코딩할 수 있습니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoded_train_dataset = dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>].<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> encode_dataset, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>, remove_columns=dataset_with_ocr[<span class="hljs-string">&quot;train&quot;</span>].column_names
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>encoded_test_dataset = dataset_with_ocr[<span class="hljs-string">&quot;test&quot;</span>].<span class="hljs-built_in">map</span>(
<span class="hljs-meta">... </span> encode_dataset, batched=<span class="hljs-literal">True</span>, batch_size=<span class="hljs-number">2</span>, remove_columns=dataset_with_ocr[<span class="hljs-string">&quot;test&quot;</span>].column_names
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1cmvv07">인코딩된 데이터 세트의 특성이 어떻게 생겼는지 확인해 보겠습니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>encoded_train_dataset.features
{<span class="hljs-string">&#x27;image&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;uint8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;input_ids&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int32&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;token_type_ids&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;attention_mask&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int8&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;bbox&#x27;</span>: <span class="hljs-type">Sequence</span>(feature=<span class="hljs-type">Sequence</span>(feature=Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>), length=-<span class="hljs-number">1</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;start_positions&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>),
<span class="hljs-string">&#x27;end_positions&#x27;</span>: Value(dtype=<span class="hljs-string">&#x27;int64&#x27;</span>, <span class="hljs-built_in">id</span>=<span class="hljs-literal">None</span>)}<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="evaluation" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#evaluation"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>평가</span></h2> <p data-svelte-h="svelte-8iuplh">문서 질의 응답을 평가하려면 상당한 양의 후처리가 필요합니다. 시간이 너무 많이 걸리지 않도록 이 가이드에서는 평가 단계를 생략합니다.
<code>Trainer</code>가 훈련 과정에서 평가 손실(evaluation loss)을 계속 계산하기 때문에 모델의 성능을 대략적으로 알 수 있습니다.
추출적(Extractive) 질의 응답은 보통 F1/exact match 방법을 사용해 평가됩니다.
직접 구현해보고 싶으시다면, Hugging Face course의 <a href="https://huggingface.co/course/chapter7/7?fw=pt#postprocessing" rel="nofollow">Question Answering chapter</a>을 참고하세요.</p> <h2 class="relative group"><a id="train" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#train"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>훈련</span></h2> <p data-svelte-h="svelte-1r96dak">축하합니다! 이 가이드의 가장 어려운 부분을 성공적으로 처리했으니 이제 나만의 모델을 훈련할 준비가 되었습니다.
훈련은 다음과 같은 단계로 이루어져 있습니다:</p> <ul data-svelte-h="svelte-dhr9ud"><li>전처리에서의 동일한 체크포인트를 사용하기 위해 <code>AutoModelForDocumentQuestionAnswering</code>으로 모델을 가져옵니다.</li> <li><code>TrainingArguments</code>로 훈련 하이퍼파라미터를 정합니다.</li> <li>예제를 배치 처리하는 함수를 정의합니다. 여기서는 <code>DefaultDataCollator</code>가 적당합니다.</li> <li>모델, 데이터 세트, 데이터 콜레이터(Data collator)와 함께 <code>Trainer</code>에 훈련 인수들을 전달합니다.</li> <li><code>train()</code>을 호출해서 모델을 미세 조정합니다.</li></ul> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForDocumentQuestionAnswering
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForDocumentQuestionAnswering.from_pretrained(model_checkpoint)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1wyv6ig"><code>TrainingArguments</code>에서 <code>output_dir</code>을 사용하여 모델을 저장할 위치를 지정하고, 적절한 하이퍼파라미터를 설정합니다.
모델을 커뮤니티와 공유하려면 <code>push_to_hub</code><code>True</code>로 설정하세요 (모델을 업로드하려면 Hugging Face에 로그인해야 합니다).
이 경우 <code>output_dir</code>은 모델의 체크포인트를 푸시할 레포지토리의 이름이 됩니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> TrainingArguments
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-comment"># 본인의 레포지토리 ID로 바꾸세요</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>repo_id = <span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>
<span class="hljs-meta">&gt;&gt;&gt; </span>training_args = TrainingArguments(
<span class="hljs-meta">... </span> output_dir=repo_id,
<span class="hljs-meta">... </span> per_device_train_batch_size=<span class="hljs-number">4</span>,
<span class="hljs-meta">... </span> num_train_epochs=<span class="hljs-number">20</span>,
<span class="hljs-meta">... </span> save_steps=<span class="hljs-number">200</span>,
<span class="hljs-meta">... </span> logging_steps=<span class="hljs-number">50</span>,
<span class="hljs-meta">... </span> eval_strategy=<span class="hljs-string">&quot;steps&quot;</span>,
<span class="hljs-meta">... </span> learning_rate=<span class="hljs-number">5e-5</span>,
<span class="hljs-meta">... </span> save_total_limit=<span class="hljs-number">2</span>,
<span class="hljs-meta">... </span> remove_unused_columns=<span class="hljs-literal">False</span>,
<span class="hljs-meta">... </span> push_to_hub=<span class="hljs-literal">True</span>,
<span class="hljs-meta">... </span>)<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1hdarm6">간단한 데이터 콜레이터를 정의하여 예제를 함께 배치합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> DefaultDataCollator
<span class="hljs-meta">&gt;&gt;&gt; </span>data_collator = DefaultDataCollator()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1heolva">마지막으로, 모든 것을 한 곳에 모아 <code>train()</code>을 호출합니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> Trainer
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer = Trainer(
<span class="hljs-meta">... </span> model=model,
<span class="hljs-meta">... </span> args=training_args,
<span class="hljs-meta">... </span> data_collator=data_collator,
<span class="hljs-meta">... </span> train_dataset=encoded_train_dataset,
<span class="hljs-meta">... </span> eval_dataset=encoded_test_dataset,
<span class="hljs-meta">... </span> tokenizer=processor,
<span class="hljs-meta">... </span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.train()<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-1gum9w7">최종 모델을 🤗 Hub에 추가하려면, 모델 카드를 생성하고 <code>push_to_hub</code>를 호출합니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>trainer.create_model_card()
<span class="hljs-meta">&gt;&gt;&gt; </span>trainer.push_to_hub()<!-- HTML_TAG_END --></pre></div> <h2 class="relative group"><a id="inference" class="header-link block pr-1.5 text-lg no-hover:hidden with-hover:absolute with-hover:p-1.5 with-hover:opacity-0 with-hover:group-hover:opacity-100 with-hover:right-full" href="#inference"><span><svg class="" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 256"><path d="M167.594 88.393a8.001 8.001 0 0 1 0 11.314l-67.882 67.882a8 8 0 1 1-11.314-11.315l67.882-67.881a8.003 8.003 0 0 1 11.314 0zm-28.287 84.86l-28.284 28.284a40 40 0 0 1-56.567-56.567l28.284-28.284a8 8 0 0 0-11.315-11.315l-28.284 28.284a56 56 0 0 0 79.196 79.197l28.285-28.285a8 8 0 1 0-11.315-11.314zM212.852 43.14a56.002 56.002 0 0 0-79.196 0l-28.284 28.284a8 8 0 1 0 11.314 11.314l28.284-28.284a40 40 0 0 1 56.568 56.567l-28.285 28.285a8 8 0 0 0 11.315 11.314l28.284-28.284a56.065 56.065 0 0 0 0-79.196z" fill="currentColor"></path></svg></span></a> <span>추론</span></h2> <p data-svelte-h="svelte-1flysn8">이제 LayoutLMv2 모델을 미세 조정하고 🤗 Hub에 업로드했으니 추론에도 사용할 수 있습니다.
추론을 위해 미세 조정된 모델을 사용해 보는 가장 간단한 방법은 <code>Pipeline</code>을 사용하는 것 입니다.</p> <p data-svelte-h="svelte-1dcjn75">예를 들어 보겠습니다:</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span>example = dataset[<span class="hljs-string">&quot;test&quot;</span>][<span class="hljs-number">2</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>question = example[<span class="hljs-string">&quot;query&quot;</span>][<span class="hljs-string">&quot;en&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span>image = example[<span class="hljs-string">&quot;image&quot;</span>]
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(question)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-built_in">print</span>(example[<span class="hljs-string">&quot;answers&quot;</span>])
<span class="hljs-string">&#x27;Who is ‘presiding’ TRRF GENERAL SESSION (PART 1)?&#x27;</span>
[<span class="hljs-string">&#x27;TRRF Vice President&#x27;</span>, <span class="hljs-string">&#x27;lee a. waller&#x27;</span>]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-gaec9h">그 다음, 모델로 문서 질의 응답을 하기 위해 파이프라인을 인스턴스화하고 이미지 + 질문 조합을 전달합니다.</p> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> pipeline
<span class="hljs-meta">&gt;&gt;&gt; </span>qa_pipeline = pipeline(<span class="hljs-string">&quot;document-question-answering&quot;</span>, model=<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>qa_pipeline(image, question)
[{<span class="hljs-string">&#x27;score&#x27;</span>: <span class="hljs-number">0.9949808120727539</span>,
<span class="hljs-string">&#x27;answer&#x27;</span>: <span class="hljs-string">&#x27;Lee A. Waller&#x27;</span>,
<span class="hljs-string">&#x27;start&#x27;</span>: <span class="hljs-number">55</span>,
<span class="hljs-string">&#x27;end&#x27;</span>: <span class="hljs-number">57</span>}]<!-- HTML_TAG_END --></pre></div> <p data-svelte-h="svelte-4epvs7">원한다면 파이프라인의 결과를 수동으로 복제할 수도 있습니다:</p> <ol data-svelte-h="svelte-10zqnq5"><li>이미지와 질문을 가져와 모델의 프로세서를 사용해 모델에 맞게 준비합니다.</li> <li>모델을 통해 결과 또는 전처리를 전달합니다.</li> <li>모델은 어떤 토큰이 답변의 시작에 있는지, 어떤 토큰이 답변이 끝에 있는지를 나타내는 <code>start_logits</code><code>end_logits</code>를 반환합니다. 둘 다 (batch_size, sequence_length) 형태를 갖습니다.</li> <li><code>start_logits</code><code>end_logits</code>의 마지막 차원을 최대로 만드는 값을 찾아 예상 <code>start_idx</code><code>end_idx</code>를 얻습니다.</li> <li>토크나이저로 답변을 디코딩합니다.</li></ol> <div class="code-block relative"><div class="absolute top-2.5 right-4"><button class="inline-flex items-center relative text-sm focus:text-green-500 cursor-pointer focus:outline-none transition duration-200 ease-in-out opacity-0 mx-0.5 text-gray-600 " title="code excerpt" type="button"><svg class="" xmlns="http://www.w3.org/2000/svg" aria-hidden="true" fill="currentColor" focusable="false" role="img" width="1em" height="1em" preserveAspectRatio="xMidYMid meet" viewBox="0 0 32 32"><path d="M28,10V28H10V10H28m0-2H10a2,2,0,0,0-2,2V28a2,2,0,0,0,2,2H28a2,2,0,0,0,2-2V10a2,2,0,0,0-2-2Z" transform="translate(0)"></path><path d="M4,18H2V4A2,2,0,0,1,4,2H18V4H4Z" transform="translate(0)"></path><rect fill="none" width="32" height="32"></rect></svg> <div class="absolute pointer-events-none transition-opacity bg-black text-white py-1 px-2 leading-tight rounded font-normal shadow left-1/2 top-full transform -translate-x-1/2 translate-y-2 opacity-0"><div class="absolute bottom-full left-1/2 transform -translate-x-1/2 w-0 h-0 border-black border-4 border-t-0" style="border-left-color: transparent; border-right-color: transparent; "></div> Copied</div></button></div> <pre class=""><!-- HTML_TAG_START --><span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">import</span> torch
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoProcessor
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">from</span> transformers <span class="hljs-keyword">import</span> AutoModelForDocumentQuestionAnswering
<span class="hljs-meta">&gt;&gt;&gt; </span>processor = AutoProcessor.from_pretrained(<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span>model = AutoModelForDocumentQuestionAnswering.from_pretrained(<span class="hljs-string">&quot;MariaK/layoutlmv2-base-uncased_finetuned_docvqa&quot;</span>)
<span class="hljs-meta">&gt;&gt;&gt; </span><span class="hljs-keyword">with</span> torch.no_grad():
<span class="hljs-meta">... </span> encoding = processor(image.convert(<span class="hljs-string">&quot;RGB&quot;</span>), question, return_tensors=<span class="hljs-string">&quot;pt&quot;</span>)
<span class="hljs-meta">... </span> outputs = model(**encoding)
<span class="hljs-meta">... </span> start_logits = outputs.start_logits
<span class="hljs-meta">... </span> end_logits = outputs.end_logits
<span class="hljs-meta">... </span> predicted_start_idx = start_logits.argmax(-<span class="hljs-number">1</span>).item()
<span class="hljs-meta">... </span> predicted_end_idx = end_logits.argmax(-<span class="hljs-number">1</span>).item()
<span class="hljs-meta">&gt;&gt;&gt; </span>processor.tokenizer.decode(encoding.input_ids.squeeze()[predicted_start_idx : predicted_end_idx + <span class="hljs-number">1</span>])
<span class="hljs-string">&#x27;lee a. waller&#x27;</span><!-- HTML_TAG_END --></pre></div> <a class="!text-gray-400 !no-underline text-sm flex items-center not-prose mt-4" href="https://github.com/huggingface/transformers/blob/main/docs/source/ko/tasks/document_question_answering.md" target="_blank"><span data-svelte-h="svelte-1kd6by1">&lt;</span> <span data-svelte-h="svelte-x0xyl0">&gt;</span> <span data-svelte-h="svelte-1dajgef"><span class="underline ml-1.5">Update</span> on GitHub</span></a> <p></p>
<script>
{
__sveltekit_1hrx8 = {
assets: "/docs/transformers/main/ko",
base: "/docs/transformers/main/ko",
env: {}
};
const element = document.currentScript.parentElement;
const data = [null,null];
Promise.all([
import("/docs/transformers/main/ko/_app/immutable/entry/start.9aa88961.js"),
import("/docs/transformers/main/ko/_app/immutable/entry/app.84fb67c3.js")
]).then(([kit, app]) => {
kit.start(app, element, {
node_ids: [0, 63],
data,
form: null,
error: null
});
});
}
</script>

Xet Storage Details

Size:
108 kB
·
Xet hash:
b4a4a6bd1b15a7981679bb33e5481e814e3e5df6b942f2ae8342e113f3ab0eab

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.