davanstrien's picture
davanstrien HF Staff
Update book from local render
2f7da2f verified
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
<meta charset="utf-8">
<meta name="generator" content="quarto-1.8.25">
<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
<title>4&nbsp; Practical Application: Advisor Index Card Extraction – AI Design Patterns for GLAM</title>
<style>
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
div.columns{display: flex; gap: min(4vw, 1.5em);}
div.column{flex: auto; overflow-x: auto;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
ul.task-list li input[type="checkbox"] {
width: 0.8em;
margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */
vertical-align: middle;
}
/* CSS for syntax highlighting */
html { -webkit-text-size-adjust: 100%; }
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
}
pre.numberSource { margin-left: 3em; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
</style>
<script src="https://cdn.jsdelivr.net/npm/jquery@3.5.1/dist/jquery.min.js" integrity="sha384-ZvpUoO/+PpLXR1lu4jmpXWu80pZlYUAfxl5NsBMWOEPSjUn/6Z/hRTt8+pR6L4N2" crossorigin="anonymous"></script><script src="../../site_libs/quarto-nav/quarto-nav.js"></script>
<script src="../../site_libs/quarto-nav/headroom.min.js"></script>
<script src="../../site_libs/clipboard/clipboard.min.js"></script>
<script src="../../site_libs/quarto-search/autocomplete.umd.js"></script>
<script src="../../site_libs/quarto-search/fuse.min.js"></script>
<script src="../../site_libs/quarto-search/quarto-search.js"></script>
<meta name="quarto:offset" content="../../">
<link href="../../patterns/structured-generation/vlm-structured-generation.html" rel="prev">
<script src="../../site_libs/quarto-html/quarto.js" type="module"></script>
<script src="../../site_libs/quarto-html/tabsets/tabsets.js" type="module"></script>
<script src="../../site_libs/quarto-html/axe/axe-check.js" type="module"></script>
<script src="../../site_libs/quarto-html/popper.min.js"></script>
<script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
<script src="../../site_libs/quarto-html/anchor.min.js"></script>
<link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-7b89279ff1a6dce999919e0e67d4d9ec.css" rel="stylesheet" id="quarto-text-highlighting-styles">
<script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
<link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
<link href="../../site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
<script id="quarto-search-options" type="application/json">{
"location": "sidebar",
"copy-button": false,
"collapse-after": 3,
"panel-placement": "start",
"type": "textbox",
"limit": 50,
"keyboard-shortcut": [
"f",
"/",
"s"
],
"show-item-context": false,
"language": {
"search-no-results-text": "No results",
"search-matching-documents-text": "matching documents",
"search-copy-link-title": "Copy link to search",
"search-hide-matches-text": "Hide additional matches",
"search-more-match-text": "more match in this document",
"search-more-matches-text": "more matches in this document",
"search-clear-button-title": "Clear",
"search-text-placeholder": "",
"search-detached-cancel-button-title": "Cancel",
"search-submit-button-title": "Submit",
"search-label": "Search"
}
}</script>
<script src="https://cdn.jsdelivr.net/npm/requirejs@2.3.6/require.min.js" integrity="sha384-c9c+LnTbwQ3aujuU7ULEPVvgLs+Fn6fJUvIGTsuu1ZcCf11fiEubah0ttpca4ntM sha384-6V1/AdqZRWk1KAlWbKBlGhN7VG4iE/yAZcO6NZPMF8od0vukrvr0tg4qY6NSrItx" crossorigin="anonymous"></script>
<script type="application/javascript">define('jquery', [],function() {return window.jQuery;})</script>
<script src="https://cdn.jsdelivr.net/npm/@jupyter-widgets/html-manager@*/dist/embed-amd.js" crossorigin="anonymous"></script>
</head>
<body class="nav-sidebar floating quarto-light">
<div id="quarto-search-results"></div>
<header id="quarto-header" class="headroom fixed-top">
<nav class="quarto-secondary-nav">
<div class="container-fluid d-flex">
<button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
<i class="bi bi-layout-text-sidebar-reverse"></i>
</button>
<nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/advisor-index-cards.html"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></a></li></ol></nav>
<a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
</a>
<button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
<i class="bi bi-search"></i>
</button>
</div>
</nav>
</header>
<!-- content -->
<div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
<!-- sidebar -->
<nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
<div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
<a href="../../index.html" class="sidebar-logo-link">
</a>
<div class="sidebar-title mb-0 py-0">
<a href="../../">AI Design Patterns for GLAM</a>
</div>
</div>
<div class="mt-2 flex-shrink-0 align-items-center">
<div class="sidebar-search">
<div id="quarto-search" class="" title="Search"></div>
</div>
</div>
<div class="sidebar-menu-container">
<ul class="list-unstyled mt-1">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../index.html" class="sidebar-item-text sidebar-link">
<span class="menu-text">Welcome</span></a>
</div>
</li>
<li class="sidebar-item sidebar-item-section">
<span class="sidebar-item-text sidebar-link text-start">
<span class="menu-text">Beyond Chat Interfaces to Collections?</span></span>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true">
<span class="menu-text">Design Patterns</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-2" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-2" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../patterns/what-is-an-ai-pattern.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">1</span>&nbsp; <span class="chapter-title">What is an AI Pattern?</span></span></a>
</div>
</li>
</ul>
</li>
<li class="sidebar-item sidebar-item-section">
<div class="sidebar-item-container">
<a class="sidebar-item-text sidebar-link text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true">
<span class="menu-text">Structured Information Extraction</span></a>
<a class="sidebar-item-toggle text-start" data-bs-toggle="collapse" data-bs-target="#quarto-sidebar-section-3" role="navigation" aria-expanded="true" aria-label="Toggle section">
<i class="bi bi-chevron-right ms-2"></i>
</a>
</div>
<ul id="quarto-sidebar-section-3" class="collapse list-unstyled sidebar-section depth1 show">
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
<span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
</div>
</li>
<li class="sidebar-item">
<div class="sidebar-item-container">
<a href="../../patterns/structured-generation/advisor-index-cards.html" class="sidebar-item-text sidebar-link active">
<span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></span></a>
</div>
</li>
</ul>
</li>
</ul>
</div>
</nav>
<div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
<!-- margin-sidebar -->
<div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
<nav id="TOC" role="doc-toc" class="toc-active">
<h2 id="toc-title">Table of contents</h2>
<ul>
<li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">4.1</span> Introduction</a></li>
<li><a href="#the-task-advisor-index-cards" id="toc-the-task-advisor-index-cards" class="nav-link" data-scroll-target="#the-task-advisor-index-cards"><span class="header-section-number">4.2</span> The Task: Advisor Index Cards</a>
<ul class="collapse">
<li><a href="#example-cards" id="toc-example-cards" class="nav-link" data-scroll-target="#example-cards"><span class="header-section-number">4.2.1</span> Example Cards</a></li>
</ul></li>
<li><a href="#schema-design" id="toc-schema-design" class="nav-link" data-scroll-target="#schema-design"><span class="header-section-number">4.3</span> Schema Design</a></li>
<li><a href="#setup" id="toc-setup" class="nav-link" data-scroll-target="#setup"><span class="header-section-number">4.4</span> Setup</a></li>
<li><a href="#extraction-examples" id="toc-extraction-examples" class="nav-link" data-scroll-target="#extraction-examples"><span class="header-section-number">4.5</span> Extraction Examples</a>
<ul class="collapse">
<li><a href="#comparing-extraction-to-ground-truth" id="toc-comparing-extraction-to-ground-truth" class="nav-link" data-scroll-target="#comparing-extraction-to-ground-truth"><span class="header-section-number">4.5.1</span> Comparing Extraction to Ground Truth</a></li>
</ul></li>
<li><a href="#evaluation-strategies" id="toc-evaluation-strategies" class="nav-link" data-scroll-target="#evaluation-strategies"><span class="header-section-number">4.6</span> Evaluation Strategies</a>
<ul class="collapse">
<li><a href="#looking-at-lots-of-samples" id="toc-looking-at-lots-of-samples" class="nav-link" data-scroll-target="#looking-at-lots-of-samples"><span class="header-section-number">4.6.1</span> Looking at lots of samples</a></li>
<li><a href="#manual-ground-truth-evaluation" id="toc-manual-ground-truth-evaluation" class="nav-link" data-scroll-target="#manual-ground-truth-evaluation"><span class="header-section-number">4.6.2</span> 1. Manual Ground Truth Evaluation</a></li>
<li><a href="#cross-model-evaluation-model-as-judge" id="toc-cross-model-evaluation-model-as-judge" class="nav-link" data-scroll-target="#cross-model-evaluation-model-as-judge"><span class="header-section-number">4.6.3</span> 2. Cross-Model Evaluation (Model-as-Judge)</a></li>
<li><a href="#internal-consistency-checks" id="toc-internal-consistency-checks" class="nav-link" data-scroll-target="#internal-consistency-checks"><span class="header-section-number">4.6.4</span> 3. Internal Consistency Checks</a></li>
<li><a href="#confidence-scoring" id="toc-confidence-scoring" class="nav-link" data-scroll-target="#confidence-scoring"><span class="header-section-number">4.6.5</span> 4. Confidence Scoring</a></li>
<li><a href="#combining-evaluation-approaches" id="toc-combining-evaluation-approaches" class="nav-link" data-scroll-target="#combining-evaluation-approaches"><span class="header-section-number">4.6.6</span> Combining Evaluation Approaches</a></li>
</ul></li>
<li><a href="#batch-processing" id="toc-batch-processing" class="nav-link" data-scroll-target="#batch-processing"><span class="header-section-number">4.7</span> Batch Processing</a>
<ul class="collapse">
<li><a href="#results-analysis" id="toc-results-analysis" class="nav-link" data-scroll-target="#results-analysis"><span class="header-section-number">4.7.1</span> Results Analysis</a></li>
</ul></li>
<li><a href="#edge-cases-and-failure-modes" id="toc-edge-cases-and-failure-modes" class="nav-link" data-scroll-target="#edge-cases-and-failure-modes"><span class="header-section-number">4.8</span> Edge Cases and Failure Modes</a></li>
<li><a href="#export-for-cataloging" id="toc-export-for-cataloging" class="nav-link" data-scroll-target="#export-for-cataloging"><span class="header-section-number">4.9</span> Export for Cataloging</a></li>
<li><a href="#next-steps" id="toc-next-steps" class="nav-link" data-scroll-target="#next-steps"><span class="header-section-number">4.10</span> Next Steps</a></li>
<li><a href="#key-takeaways" id="toc-key-takeaways" class="nav-link" data-scroll-target="#key-takeaways"><span class="header-section-number">4.11</span> Key Takeaways</a></li>
</ul>
</nav>
</div>
<!-- main -->
<main class="content" id="quarto-document-content">
<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/advisor-index-cards.html"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></a></li></ol></nav>
<div class="quarto-title">
<h1 class="title"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></h1>
</div>
<div class="quarto-title-meta">
</div>
</header>
<section id="introduction" class="level2" data-number="4.1">
<h2 data-number="4.1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">4.1</span> Introduction</h2>
<p>This chapter demonstrates a practical application of VLM-based structured extraction on a real-world GLAM digitization project: extracting structured metadata from historical index cards from the National Library of Scotland’s Advocate’s Library collection.</p>
<p>Unlike the previous chapter which focused on explaining VLM concepts and setup, this chapter assumes you’re familiar with the basics and focuses on:</p>
<ol type="1">
<li>Designing schemas for real catalog requirements</li>
<li>Running extractions at scale</li>
<li><strong>Evaluating extraction quality</strong> - different strategies for assessing accuracy</li>
<li>Handling edge cases and failures</li>
</ol>
</section>
<section id="the-task-advisor-index-cards" class="level2" data-number="4.2">
<h2 data-number="4.2" class="anchored" data-anchor-id="the-task-advisor-index-cards"><span class="header-section-number">4.2</span> The Task: Advisor Index Cards</h2>
<p>The National Library of Scotland has a collection of historical index cards documenting manuscripts and correspondence. Each card follows a fairly consistent format:</p>
<ul>
<li><strong>Surname</strong>: Family name</li>
<li><strong>Forenames</strong>: Given names</li>
<li><strong>Epithet</strong>: Role, title, or occupation</li>
<li><strong>MS no</strong>: Manuscript reference number</li>
<li><strong>Description</strong>: Document type and date</li>
<li><strong>Folios</strong>: Page references</li>
</ul>
<p>The goal is to extract this structured information to enable: - Searchable digital catalog - Integration with library management systems - Research access to historical collections</p>
<section id="example-cards" class="level3" data-number="4.2.1">
<h3 data-number="4.2.1" class="anchored" data-anchor-id="example-cards"><span class="header-section-number">4.2.1</span> Example Cards</h3>
<p>Let’s look at a few sample cards from the collection:</p>
<div id="cell-4" class="cell" data-execution_count="6">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> pathlib <span class="im">import</span> Path</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a>images <span class="op">=</span> <span class="bu">list</span>(Path(<span class="st">"../../assets/vllm-structured-generation/indexes/"</span>).rglob(<span class="st">"*.JPG"</span>))</span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>images</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display" data-execution_count="6">
<pre><code>[PosixPath('../../assets/vllm-structured-generation/indexes/DSC00172.JPG'),
PosixPath('../../assets/vllm-structured-generation/indexes/DSC00173.JPG'),
PosixPath('../../assets/vllm-structured-generation/indexes/DSC00171.JPG'),
PosixPath('../../assets/vllm-structured-generation/indexes/DSC00170.JPG'),
PosixPath('../../assets/vllm-structured-generation/indexes/DSC00169.JPG'),
PosixPath('../../assets/vllm-structured-generation/indexes/DSC00168.JPG')]</code></pre>
</div>
</div>
<div id="cell-5" class="cell" data-execution_count="7">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># display a grid of images using matplotlib (len of images)</span></span>
<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>number_of_images <span class="op">=</span> <span class="bu">len</span>(images)</span>
<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>cols <span class="op">=</span> <span class="dv">3</span></span>
<span id="cb3-4"><a href="#cb3-4" aria-hidden="true" tabindex="-1"></a>rows <span class="op">=</span> (number_of_images <span class="op">+</span> cols <span class="op">-</span> <span class="dv">1</span>) <span class="op">//</span> cols</span>
<span id="cb3-5"><a href="#cb3-5" aria-hidden="true" tabindex="-1"></a>fig, axs <span class="op">=</span> plt.subplots(rows, cols, figsize<span class="op">=</span>(<span class="dv">15</span>, <span class="dv">5</span> <span class="op">*</span> rows))</span>
<span id="cb3-6"><a href="#cb3-6" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i, img_path <span class="kw">in</span> <span class="bu">enumerate</span>(images):</span>
<span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a> img <span class="op">=</span> plt.imread(img_path)</span>
<span id="cb3-8"><a href="#cb3-8" aria-hidden="true" tabindex="-1"></a> ax <span class="op">=</span> axs[i <span class="op">//</span> cols, i <span class="op">%</span> cols] <span class="cf">if</span> rows <span class="op">&gt;</span> <span class="dv">1</span> <span class="cf">else</span> axs[i <span class="op">%</span> cols]</span>
<span id="cb3-9"><a href="#cb3-9" aria-hidden="true" tabindex="-1"></a> ax.imshow(img)</span>
<span id="cb3-10"><a href="#cb3-10" aria-hidden="true" tabindex="-1"></a> ax.axis(<span class="st">'off'</span>)</span>
<span id="cb3-11"><a href="#cb3-11" aria-hidden="true" tabindex="-1"></a> ax.set_title(img_path.stem)</span>
<span id="cb3-12"><a href="#cb3-12" aria-hidden="true" tabindex="-1"></a>plt.tight_layout()</span>
<span id="cb3-13"><a href="#cb3-13" aria-hidden="true" tabindex="-1"></a>plt.show()</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-3-output-1.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
</div>
</section>
</section>
<section id="schema-design" class="level2" data-number="4.3">
<h2 data-number="4.3" class="anchored" data-anchor-id="schema-design"><span class="header-section-number">4.3</span> Schema Design</h2>
<p>Working with the library curators, we designed a schema that matches their cataloging requirements. The schema is intentionally simple - complex schemas are harder for VLMs to extract reliably.</p>
<p>This schema is something we can iterate on later based on extraction quality but gives us a solid starting point.</p>
<div id="cell-7" class="cell" data-execution_count="8">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> pydantic <span class="im">import</span> BaseModel, Field</span>
<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> typing <span class="im">import</span> Optional</span>
<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb4-4"><a href="#cb4-4" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> IndexCardEntry(BaseModel):</span>
<span id="cb4-5"><a href="#cb4-5" aria-hidden="true" tabindex="-1"></a> <span class="co">"""Schema for index card extraction matching curator specification"""</span></span>
<span id="cb4-6"><a href="#cb4-6" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb4-7"><a href="#cb4-7" aria-hidden="true" tabindex="-1"></a> surname: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Family name as written on card"</span>)</span>
<span id="cb4-8"><a href="#cb4-8" aria-hidden="true" tabindex="-1"></a> forenames: Optional[<span class="bu">str</span>] <span class="op">=</span> Field(<span class="va">None</span>, description<span class="op">=</span><span class="st">"Given names"</span>)</span>
<span id="cb4-9"><a href="#cb4-9" aria-hidden="true" tabindex="-1"></a> epithet: Optional[<span class="bu">str</span>] <span class="op">=</span> Field(<span class="va">None</span>, description<span class="op">=</span><span class="st">"Title, occupation, or role"</span>)</span>
<span id="cb4-10"><a href="#cb4-10" aria-hidden="true" tabindex="-1"></a> ms_no: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Manuscript number"</span>)</span>
<span id="cb4-11"><a href="#cb4-11" aria-hidden="true" tabindex="-1"></a> description: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Document description with date"</span>)</span>
<span id="cb4-12"><a href="#cb4-12" aria-hidden="true" tabindex="-1"></a> folios: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Folio reference"</span>)</span>
<span id="cb4-13"><a href="#cb4-13" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb4-14"><a href="#cb4-14" aria-hidden="true" tabindex="-1"></a> failed_to_parse: <span class="bu">bool</span> <span class="op">=</span> Field(</span>
<span id="cb4-15"><a href="#cb4-15" aria-hidden="true" tabindex="-1"></a> <span class="va">False</span>,</span>
<span id="cb4-16"><a href="#cb4-16" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Set to True if the card cannot be reliably extracted (illegible, damaged, etc.)"</span></span>
<span id="cb4-17"><a href="#cb4-17" aria-hidden="true" tabindex="-1"></a> )</span>
<span id="cb4-18"><a href="#cb4-18" aria-hidden="true" tabindex="-1"></a> notes: Optional[<span class="bu">str</span>] <span class="op">=</span> Field(</span>
<span id="cb4-19"><a href="#cb4-19" aria-hidden="true" tabindex="-1"></a> <span class="va">None</span>, </span>
<span id="cb4-20"><a href="#cb4-20" aria-hidden="true" tabindex="-1"></a> description<span class="op">=</span><span class="st">"Optional notes about the card: handwritten annotations, ambiguities, "</span></span>
<span id="cb4-21"><a href="#cb4-21" aria-hidden="true" tabindex="-1"></a> <span class="st">"corrections, or reasons for failed parsing."</span></span>
<span id="cb4-22"><a href="#cb4-22" aria-hidden="true" tabindex="-1"></a> )</span>
<span id="cb4-23"><a href="#cb4-23" aria-hidden="true" tabindex="-1"></a></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<p>Let’s take a look at the schema definition we’ll use for extraction:</p>
<div id="cell-9" class="cell" data-execution_count="11">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Display the schema</span></span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> rich <span class="im">import</span> <span class="bu">print</span></span>
<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(IndexCardEntry.model_json_schema())</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace"><span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Schema for index card extraction matching curator specification'</span>,
<span style="color: #008000; text-decoration-color: #008000">'properties'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'surname'</span>: <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Family name as written on card'</span>, <span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Surname'</span>, <span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'forenames'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'anyOf'</span>: <span style="font-weight: bold">[{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>, <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'null'</span><span style="font-weight: bold">}]</span>,
<span style="color: #008000; text-decoration-color: #008000">'default'</span>: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span>,
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Given names'</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Forenames'</span>
<span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'epithet'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'anyOf'</span>: <span style="font-weight: bold">[{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>, <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'null'</span><span style="font-weight: bold">}]</span>,
<span style="color: #008000; text-decoration-color: #008000">'default'</span>: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span>,
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Title, occupation, or role'</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Epithet'</span>
<span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'ms_no'</span>: <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Manuscript number'</span>, <span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Ms No'</span>, <span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Document description with date'</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Description'</span>,
<span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span>
<span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'folios'</span>: <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Folio reference'</span>, <span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Folios'</span>, <span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'failed_to_parse'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'default'</span>: <span style="color: #ff0000; text-decoration-color: #ff0000; font-style: italic">False</span>,
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Set to True if the card cannot be reliably extracted (illegible, damaged, etc.)'</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Failed To Parse'</span>,
<span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'boolean'</span>
<span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'notes'</span>: <span style="font-weight: bold">{</span>
<span style="color: #008000; text-decoration-color: #008000">'anyOf'</span>: <span style="font-weight: bold">[{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'string'</span><span style="font-weight: bold">}</span>, <span style="font-weight: bold">{</span><span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'null'</span><span style="font-weight: bold">}]</span>,
<span style="color: #008000; text-decoration-color: #008000">'default'</span>: <span style="color: #800080; text-decoration-color: #800080; font-style: italic">None</span>,
<span style="color: #008000; text-decoration-color: #008000">'description'</span>: <span style="color: #008000; text-decoration-color: #008000">'Optional notes about the card: handwritten annotations, ambiguities, corrections, or </span>
<span style="color: #008000; text-decoration-color: #008000">reasons for failed parsing.'</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'Notes'</span>
<span style="font-weight: bold">}</span>
<span style="font-weight: bold">}</span>,
<span style="color: #008000; text-decoration-color: #008000">'required'</span>: <span style="font-weight: bold">[</span><span style="color: #008000; text-decoration-color: #008000">'surname'</span>, <span style="color: #008000; text-decoration-color: #008000">'ms_no'</span>, <span style="color: #008000; text-decoration-color: #008000">'description'</span>, <span style="color: #008000; text-decoration-color: #008000">'folios'</span><span style="font-weight: bold">]</span>,
<span style="color: #008000; text-decoration-color: #008000">'title'</span>: <span style="color: #008000; text-decoration-color: #008000">'IndexCardEntry'</span>,
<span style="color: #008000; text-decoration-color: #008000">'type'</span>: <span style="color: #008000; text-decoration-color: #008000">'object'</span>
<span style="font-weight: bold">}</span>
</pre>
</div>
</div>
</section>
<section id="setup" class="level2" data-number="4.4">
<h2 data-number="4.4" class="anchored" data-anchor-id="setup"><span class="header-section-number">4.4</span> Setup</h2>
<p>We’ll reuse the VLM setup from the previous chapter. If you haven’t already, make sure LM Studio is running with a VLM loaded.</p>
<div id="cell-11" class="cell" data-execution_count="12">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> openai <span class="im">import</span> OpenAI</span>
<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> base64</span>
<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> io <span class="im">import</span> BytesIO</span>
<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> PIL <span class="im">import</span> Image <span class="im">as</span> PILImage</span>
<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a>client <span class="op">=</span> OpenAI(</span>
<span id="cb6-8"><a href="#cb6-8" aria-hidden="true" tabindex="-1"></a> base_url<span class="op">=</span><span class="st">"http://localhost:1234/v1"</span>,</span>
<span id="cb6-9"><a href="#cb6-9" aria-hidden="true" tabindex="-1"></a> api_key<span class="op">=</span><span class="st">"lm-studio"</span></span>
<span id="cb6-10"><a href="#cb6-10" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<div id="cell-12" class="cell" data-execution_count="13">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>client.models.<span class="bu">list</span>() </span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display" data-execution_count="13">
<pre><code>SyncPage[Model](data=[Model(id='qwen3-vl-2b-instruct-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-vl-8b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-vl-4b', created=None, object='model', owned_by='organization_owner'), Model(id='text-embedding-nomic-embed-text-v1.5', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-thinking@4bit', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-thinking@3bit', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-4b-thinking-2507', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3-12b', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3-4b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen2-0.5b-instruct-fingreylit', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3n-e4b', created=None, object='model', owned_by='organization_owner'), Model(id='granite-vision-3.3-2b', created=None, object='model', owned_by='organization_owner'), Model(id='ibm/granite-4-h-tiny', created=None, object='model', owned_by='organization_owner'), Model(id='iconclass-vlm', created=None, object='model', owned_by='organization_owner'), Model(id='mlx-community/qwen2.5-vl-3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='lmstudio-community/qwen2.5-vl-3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='lfm2-vl-1.6b', created=None, object='model', owned_by='organization_owner'), Model(id='mimo-vl-7b-rl-2508@q4_k_s', created=None, object='model', owned_by='organization_owner'), Model(id='mimo-vl-7b-rl-2508@q8_0', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-30b-a3b-instruct-2507', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-4b-instruct-2507-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='openai/gpt-oss-20b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen2.5-vl-7b', created=None, object='model', owned_by='organization_owner'), Model(id='mistralai/mistral-small-3.2', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-30b-a3b-instruct-2507-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='liquid/lfm2-1.2b', created=None, object='model', owned_by='organization_owner'), Model(id='smollm3-3b-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='unsloth/smollm3-3b', created=None, object='model', owned_by='organization_owner'), Model(id='ggml-org/smollm3-3b', created=None, object='model', owned_by='organization_owner'), Model(id='mlx-community/smollm3-3b', created=None, object='model', owned_by='organization_owner')], object='list')</code></pre>
</div>
</div>
<div id="cell-13" class="cell" data-execution_count="16">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> typing <span class="im">import</span> Union</span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> query_image_structured(image: Union[PILImage.Image, <span class="bu">str</span>], prompt: <span class="bu">str</span>, schema: BaseModel, model<span class="op">=</span><span class="st">'qwen/qwen3-vl-4b'</span>):</span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="co">"""</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="co"> Query VLM with an image and get structured output based on a Pydantic schema.</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a><span class="co"> </span></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a><span class="co"> Args:</span></span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a><span class="co"> image: PIL Image or file path to the image</span></span>
<span id="cb9-8"><a href="#cb9-8" aria-hidden="true" tabindex="-1"></a><span class="co"> prompt: Text prompt describing what to extract</span></span>
<span id="cb9-9"><a href="#cb9-9" aria-hidden="true" tabindex="-1"></a><span class="co"> schema: Pydantic model class defining the expected output structure</span></span>
<span id="cb9-10"><a href="#cb9-10" aria-hidden="true" tabindex="-1"></a><span class="co"> model: Model ID to use for the query</span></span>
<span id="cb9-11"><a href="#cb9-11" aria-hidden="true" tabindex="-1"></a><span class="co"> </span></span>
<span id="cb9-12"><a href="#cb9-12" aria-hidden="true" tabindex="-1"></a><span class="co"> Returns:</span></span>
<span id="cb9-13"><a href="#cb9-13" aria-hidden="true" tabindex="-1"></a><span class="co"> Parsed Pydantic model instance with the extracted data</span></span>
<span id="cb9-14"><a href="#cb9-14" aria-hidden="true" tabindex="-1"></a><span class="co"> """</span></span>
<span id="cb9-15"><a href="#cb9-15" aria-hidden="true" tabindex="-1"></a> <span class="co"># Convert image to base64</span></span>
<span id="cb9-16"><a href="#cb9-16" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> <span class="bu">isinstance</span>(image, PILImage.Image):</span>
<span id="cb9-17"><a href="#cb9-17" aria-hidden="true" tabindex="-1"></a> buffered <span class="op">=</span> BytesIO()</span>
<span id="cb9-18"><a href="#cb9-18" aria-hidden="true" tabindex="-1"></a> image.save(buffered, <span class="bu">format</span><span class="op">=</span><span class="st">"JPEG"</span>)</span>
<span id="cb9-19"><a href="#cb9-19" aria-hidden="true" tabindex="-1"></a> image_base64 <span class="op">=</span> base64.b64encode(buffered.getvalue()).decode(<span class="st">'utf-8'</span>)</span>
<span id="cb9-20"><a href="#cb9-20" aria-hidden="true" tabindex="-1"></a> <span class="cf">else</span>:</span>
<span id="cb9-21"><a href="#cb9-21" aria-hidden="true" tabindex="-1"></a> <span class="cf">with</span> <span class="bu">open</span>(image, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
<span id="cb9-22"><a href="#cb9-22" aria-hidden="true" tabindex="-1"></a> image_base64 <span class="op">=</span> base64.b64encode(f.read()).decode(<span class="st">'utf-8'</span>)</span>
<span id="cb9-23"><a href="#cb9-23" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb9-24"><a href="#cb9-24" aria-hidden="true" tabindex="-1"></a> <span class="co"># Query with structured output</span></span>
<span id="cb9-25"><a href="#cb9-25" aria-hidden="true" tabindex="-1"></a> completion <span class="op">=</span> client.beta.chat.completions.parse(</span>
<span id="cb9-26"><a href="#cb9-26" aria-hidden="true" tabindex="-1"></a> model<span class="op">=</span>model,</span>
<span id="cb9-27"><a href="#cb9-27" aria-hidden="true" tabindex="-1"></a> messages<span class="op">=</span>[{</span>
<span id="cb9-28"><a href="#cb9-28" aria-hidden="true" tabindex="-1"></a> <span class="st">"role"</span>: <span class="st">"user"</span>,</span>
<span id="cb9-29"><a href="#cb9-29" aria-hidden="true" tabindex="-1"></a> <span class="st">"content"</span>: [</span>
<span id="cb9-30"><a href="#cb9-30" aria-hidden="true" tabindex="-1"></a> {<span class="st">"type"</span>: <span class="st">"text"</span>, <span class="st">"text"</span>: prompt},</span>
<span id="cb9-31"><a href="#cb9-31" aria-hidden="true" tabindex="-1"></a> {<span class="st">"type"</span>: <span class="st">"image_url"</span>, <span class="st">"image_url"</span>: {<span class="st">"url"</span>: <span class="ss">f"data:image/jpeg;base64,</span><span class="sc">{</span>image_base64<span class="sc">}</span><span class="ss">"</span>}}</span>
<span id="cb9-32"><a href="#cb9-32" aria-hidden="true" tabindex="-1"></a> ]</span>
<span id="cb9-33"><a href="#cb9-33" aria-hidden="true" tabindex="-1"></a> }],</span>
<span id="cb9-34"><a href="#cb9-34" aria-hidden="true" tabindex="-1"></a> response_format<span class="op">=</span>schema,</span>
<span id="cb9-35"><a href="#cb9-35" aria-hidden="true" tabindex="-1"></a> temperature<span class="op">=</span><span class="fl">0.3</span> <span class="co"># Lower temperature for more consistent extraction</span></span>
<span id="cb9-36"><a href="#cb9-36" aria-hidden="true" tabindex="-1"></a> )</span>
<span id="cb9-37"><a href="#cb9-37" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb9-38"><a href="#cb9-38" aria-hidden="true" tabindex="-1"></a> <span class="co"># Return the parsed structured data</span></span>
<span id="cb9-39"><a href="#cb9-39" aria-hidden="true" tabindex="-1"></a> <span class="cf">return</span> completion.choices[<span class="dv">0</span>].message.parsed</span>
<span id="cb9-40"><a href="#cb9-40" aria-hidden="true" tabindex="-1"></a></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="extraction-examples" class="level2" data-number="4.5">
<h2 data-number="4.5" class="anchored" data-anchor-id="extraction-examples"><span class="header-section-number">4.5</span> Extraction Examples</h2>
<p>Let’s run extraction on several sample cards to see how the model performs.</p>
<div id="cell-15" class="cell" data-execution_count="17">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>prompt <span class="op">=</span> <span class="st">"""Extract structured information from this historical library index card and return it as JSON.</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="st"> This is an index card from the National Library of Scotland's Advocate's Library collection. Each card documents a person and associated manuscript references.</span></span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="st"> Return a JSON object with these exact fields:</span></span>
<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a><span class="st"> {</span></span>
<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a><span class="st"> "surname": "Family name exactly as typed (e.g., 'ABAD', 'ABARACA Y BOLEA')",</span></span>
<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a><span class="st"> "forenames": "Given names (e.g., 'Joseph', 'Thomas') or null if not present",</span></span>
<span id="cb10-10"><a href="#cb10-10" aria-hidden="true" tabindex="-1"></a><span class="st"> "epithet": "Title, occupation, or role (e.g., 'Captain, Spanish Army') or null if not present",</span></span>
<span id="cb10-11"><a href="#cb10-11" aria-hidden="true" tabindex="-1"></a><span class="st"> "ms_no": "Manuscript number exactly as written (e.g., '5538', '5529')",</span></span>
<span id="cb10-12"><a href="#cb10-12" aria-hidden="true" tabindex="-1"></a><span class="st"> "description": "Document description with date (e.g., 'letter of (1783)', 'copy of petition of (ca. 1783)')",</span></span>
<span id="cb10-13"><a href="#cb10-13" aria-hidden="true" tabindex="-1"></a><span class="st"> "folios": "Folio reference exactly as written (e.g., 'f.11', 'f.169')",</span></span>
<span id="cb10-14"><a href="#cb10-14" aria-hidden="true" tabindex="-1"></a><span class="st"> "failed_to_parse": false (or true if card is illegible/severely damaged),</span></span>
<span id="cb10-15"><a href="#cb10-15" aria-hidden="true" tabindex="-1"></a><span class="st"> "notes": "Optional notes about handwritten corrections, ambiguities, or parsing issues"</span></span>
<span id="cb10-16"><a href="#cb10-16" aria-hidden="true" tabindex="-1"></a><span class="st"> }</span></span>
<span id="cb10-17"><a href="#cb10-17" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-18"><a href="#cb10-18" aria-hidden="true" tabindex="-1"></a><span class="st"> Guidelines:</span></span>
<span id="cb10-19"><a href="#cb10-19" aria-hidden="true" tabindex="-1"></a><span class="st"> - Extract text exactly as it appears - do not correct spelling or expand abbreviations</span></span>
<span id="cb10-20"><a href="#cb10-20" aria-hidden="true" tabindex="-1"></a><span class="st"> - Preserve original punctuation and formatting</span></span>
<span id="cb10-21"><a href="#cb10-21" aria-hidden="true" tabindex="-1"></a><span class="st"> - If a field is unclear but you can make a reasonable inference, extract it and note the ambiguity in "notes"</span></span>
<span id="cb10-22"><a href="#cb10-22" aria-hidden="true" tabindex="-1"></a><span class="st"> - Only set "failed_to_parse" to true if you genuinely cannot extract the required fields</span></span>
<span id="cb10-23"><a href="#cb10-23" aria-hidden="true" tabindex="-1"></a><span class="st"> - Use null for optional fields (forenames, epithet, notes) if they are not present or marked with a line"""</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<div id="cell-16" class="cell" data-execution_count="18">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>image <span class="op">=</span> PILImage.<span class="bu">open</span>(images[<span class="dv">0</span>])</span>
<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>image </span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display" data-execution_count="18">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-10-output-1.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
</div>
<div id="cell-17" class="cell" data-execution_count="19">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> rich <span class="im">import</span> <span class="bu">print</span></span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>result <span class="op">=</span> query_image_structured(image, prompt, IndexCardEntry, model<span class="op">=</span><span class="st">'qwen/qwen3-vl-4b'</span>)</span>
<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a> </span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<div id="cell-18" class="cell" data-execution_count="20">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(result)</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<pre style="white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace"><span style="color: #800080; text-decoration-color: #800080; font-weight: bold">IndexCardEntry</span><span style="font-weight: bold">(</span>
<span style="color: #808000; text-decoration-color: #808000">surname</span>=<span style="color: #008000; text-decoration-color: #008000">'ABBAATE'</span>,
<span style="color: #808000; text-decoration-color: #808000">forenames</span>=<span style="color: #008000; text-decoration-color: #008000">'Itala'</span>,
<span style="color: #808000; text-decoration-color: #808000">epithet</span>=<span style="color: #008000; text-decoration-color: #008000">'Daughter of the Physician'</span>,
<span style="color: #808000; text-decoration-color: #808000">ms_no</span>=<span style="color: #008000; text-decoration-color: #008000">'2633'</span>,
<span style="color: #808000; text-decoration-color: #808000">description</span>=<span style="color: #008000; text-decoration-color: #008000">'letter of (1878)'</span>,
<span style="color: #808000; text-decoration-color: #808000">folios</span>=<span style="color: #008000; text-decoration-color: #008000">'f. 38'</span>,
<span style="color: #808000; text-decoration-color: #808000">failed_to_parse</span>=<span style="color: #ff0000; text-decoration-color: #ff0000; font-style: italic">False</span>,
<span style="color: #808000; text-decoration-color: #808000">notes</span>=<span style="color: #008000; text-decoration-color: #008000">"Handwritten corrections and annotations present: 'Cairo' (instead of 'ABBAATE'), 'Cairo' (instead of </span>
<span style="color: #008000; text-decoration-color: #008000">'ABBAATE'), 'Physician' (instead of 'Physician'), '2633' (instead of '2633'), 'f. 38' (instead of 'f. 38'). Also, </span>
<span style="color: #008000; text-decoration-color: #008000">'Cairo' appears to be a scribbled correction or miswriting of 'ABBAATE'."</span>
<span style="font-weight: bold">)</span>
</pre>
</div>
</div>
<section id="comparing-extraction-to-ground-truth" class="level3" data-number="4.5.1">
<h3 data-number="4.5.1" class="anchored" data-anchor-id="comparing-extraction-to-ground-truth"><span class="header-section-number">4.5.1</span> Comparing Extraction to Ground Truth</h3>
<p>Let’s compare a few extractions to the actual card content:</p>
<div id="cell-20" class="cell" data-execution_count="21">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> tqdm.auto <span class="im">import</span> tqdm</span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>results <span class="op">=</span> []</span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> img_path <span class="kw">in</span> tqdm(images):</span>
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a> image <span class="op">=</span> PILImage.<span class="bu">open</span>(img_path)</span>
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a> result <span class="op">=</span> query_image_structured(image, prompt, IndexCardEntry, model<span class="op">=</span><span class="st">'qwen/qwen3-vl-4b'</span>)</span>
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a> results.append((img_path.stem, result))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<script type="application/vnd.jupyter.widget-view+json">
{"model_id":"76709e59f6224a43852846be5e249d09","version_major":2,"version_minor":0,"quarto_mimetype":"application/vnd.jupyter.widget-view+json"}
</script>
</div>
</div>
</section>
</section>
<section id="evaluation-strategies" class="level2" data-number="4.6">
<h2 data-number="4.6" class="anchored" data-anchor-id="evaluation-strategies"><span class="header-section-number">4.6</span> Evaluation Strategies</h2>
<p>How do we know if the extraction is working well? There are several approaches to evaluation, each with different tradeoffs.</p>
<section id="looking-at-lots-of-samples" class="level3" data-number="4.6.1">
<h3 data-number="4.6.1" class="anchored" data-anchor-id="looking-at-lots-of-samples"><span class="header-section-number">4.6.1</span> Looking at lots of samples</h3>
<p>It sounds simple, but looking at a large number of random samples can give a good sense of overall quality. You can spot common errors and get a feel for how reliable the extraction is. You can quickly build intuition about what might be going wrong and where to focus improvement efforts. Realistically you will usually spend some time iterating on the prompt and schema at this stage. Looking at more than one example is important to avoid overfitting to a single case but you don’t immediately need to look at hundreds of examples or set up complex metrics or evaluations. This can come later.</p>
<div id="cell-23" class="cell" data-execution_count="22">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i, (img_stem, result) <span class="kw">in</span> <span class="bu">enumerate</span>(results):</span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a> fig, (ax_img, ax_text) <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, figsize<span class="op">=</span>(<span class="dv">16</span>, <span class="dv">6</span>), </span>
<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a> gridspec_kw<span class="op">=</span>{<span class="st">'width_ratios'</span>: [<span class="dv">1</span>, <span class="dv">1</span>]})</span>
<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a> <span class="co"># Left: Display image</span></span>
<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a> img <span class="op">=</span> plt.imread(images[i])</span>
<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a> ax_img.imshow(img)</span>
<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a> ax_img.axis(<span class="st">'off'</span>)</span>
<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a> ax_img.set_title(<span class="ss">f"Card </span><span class="sc">{</span>i<span class="op">+</span><span class="dv">1</span><span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>img_stem<span class="sc">}</span><span class="ss">"</span>, fontsize<span class="op">=</span><span class="dv">14</span>, fontweight<span class="op">=</span><span class="st">'bold'</span>)</span>
<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a> <span class="co"># Right: Display extracted data as formatted text</span></span>
<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a> ax_text.axis(<span class="st">'off'</span>)</span>
<span id="cb15-13"><a href="#cb15-13" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-14"><a href="#cb15-14" aria-hidden="true" tabindex="-1"></a> <span class="co"># Format the extracted data nicely</span></span>
<span id="cb15-15"><a href="#cb15-15" aria-hidden="true" tabindex="-1"></a> text_lines <span class="op">=</span> [</span>
<span id="cb15-16"><a href="#cb15-16" aria-hidden="true" tabindex="-1"></a> <span class="st">"Extracted Data:"</span>,</span>
<span id="cb15-17"><a href="#cb15-17" aria-hidden="true" tabindex="-1"></a> <span class="st">""</span>,</span>
<span id="cb15-18"><a href="#cb15-18" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Surname: </span><span class="sc">{</span>result<span class="sc">.</span>surname<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-19"><a href="#cb15-19" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Forenames: </span><span class="sc">{</span>result<span class="sc">.</span>forenames <span class="kw">or</span> <span class="st">'N/A'</span><span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-20"><a href="#cb15-20" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Epithet: </span><span class="sc">{</span>result<span class="sc">.</span>epithet <span class="kw">or</span> <span class="st">'N/A'</span><span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-21"><a href="#cb15-21" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"MS No: </span><span class="sc">{</span>result<span class="sc">.</span>ms_no<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-22"><a href="#cb15-22" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Description: </span><span class="sc">{</span>result<span class="sc">.</span>description<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-23"><a href="#cb15-23" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Folios: </span><span class="sc">{</span>result<span class="sc">.</span>folios<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-24"><a href="#cb15-24" aria-hidden="true" tabindex="-1"></a> <span class="st">""</span>,</span>
<span id="cb15-25"><a href="#cb15-25" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Failed to Parse: </span><span class="sc">{</span>result<span class="sc">.</span>failed_to_parse<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb15-26"><a href="#cb15-26" aria-hidden="true" tabindex="-1"></a> ]</span>
<span id="cb15-27"><a href="#cb15-27" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-28"><a href="#cb15-28" aria-hidden="true" tabindex="-1"></a> <span class="co"># Add notes if present</span></span>
<span id="cb15-29"><a href="#cb15-29" aria-hidden="true" tabindex="-1"></a> <span class="cf">if</span> result.notes:</span>
<span id="cb15-30"><a href="#cb15-30" aria-hidden="true" tabindex="-1"></a> text_lines.extend((<span class="st">""</span>, <span class="st">"Notes:"</span>))</span>
<span id="cb15-31"><a href="#cb15-31" aria-hidden="true" tabindex="-1"></a> <span class="co"># Wrap long notes</span></span>
<span id="cb15-32"><a href="#cb15-32" aria-hidden="true" tabindex="-1"></a> <span class="im">import</span> textwrap</span>
<span id="cb15-33"><a href="#cb15-33" aria-hidden="true" tabindex="-1"></a> wrapped_notes <span class="op">=</span> textwrap.fill(result.notes, width<span class="op">=</span><span class="dv">60</span>)</span>
<span id="cb15-34"><a href="#cb15-34" aria-hidden="true" tabindex="-1"></a> text_lines.append(wrapped_notes)</span>
<span id="cb15-35"><a href="#cb15-35" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-36"><a href="#cb15-36" aria-hidden="true" tabindex="-1"></a> <span class="co"># Join and display</span></span>
<span id="cb15-37"><a href="#cb15-37" aria-hidden="true" tabindex="-1"></a> formatted_text <span class="op">=</span> <span class="st">"</span><span class="ch">\n</span><span class="st">"</span>.join(text_lines)</span>
<span id="cb15-38"><a href="#cb15-38" aria-hidden="true" tabindex="-1"></a> ax_text.text(<span class="fl">0.05</span>, <span class="fl">0.95</span>, formatted_text, </span>
<span id="cb15-39"><a href="#cb15-39" aria-hidden="true" tabindex="-1"></a> transform<span class="op">=</span>ax_text.transAxes,</span>
<span id="cb15-40"><a href="#cb15-40" aria-hidden="true" tabindex="-1"></a> fontsize<span class="op">=</span><span class="dv">11</span>,</span>
<span id="cb15-41"><a href="#cb15-41" aria-hidden="true" tabindex="-1"></a> verticalalignment<span class="op">=</span><span class="st">'top'</span>,</span>
<span id="cb15-42"><a href="#cb15-42" aria-hidden="true" tabindex="-1"></a> fontfamily<span class="op">=</span><span class="st">'monospace'</span>,</span>
<span id="cb15-43"><a href="#cb15-43" aria-hidden="true" tabindex="-1"></a> bbox<span class="op">=</span><span class="bu">dict</span>(boxstyle<span class="op">=</span><span class="st">'round'</span>, facecolor<span class="op">=</span><span class="st">'wheat'</span>, alpha<span class="op">=</span><span class="fl">0.3</span>))</span>
<span id="cb15-44"><a href="#cb15-44" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb15-45"><a href="#cb15-45" aria-hidden="true" tabindex="-1"></a> plt.tight_layout()</span>
<span id="cb15-46"><a href="#cb15-46" aria-hidden="true" tabindex="-1"></a> plt.show()</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-1.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-2.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-3.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-4.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-5.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-14-output-6.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
</div>
<section id="what-we-learned-from-these-samples" class="level4" data-number="4.6.1.1">
<h4 data-number="4.6.1.1" class="anchored" data-anchor-id="what-we-learned-from-these-samples"><span class="header-section-number">4.6.1.1</span> What we learned from these samples</h4>
<ul>
<li>It seems that in these examples the <code>notes</code> field isn’t really adding much value and potentially it just adds noise.</li>
<li>While the <code>failed_to_parse</code> flag sounds useful, we may want to rely on other approaches to identify failures since the model may not always set this flag correctly (and in this case we probably have some other ways to identify failures like looking for missing critical fields).</li>
<li>Overall, we should prioritize extracting the most relevant information and avoid including fields that do not contribute to the understanding of the index card content. The simpler the schema the less for us to have to check and the fewer tokens the model has to generate. When we’re testing with small batches it doesn’t seem so important but when scaling to thousands of cards it can make a bigger difference.</li>
</ul>
<div id="cell-25" class="cell" data-execution_count="24">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> pydantic <span class="im">import</span> BaseModel, Field</span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> typing <span class="im">import</span> Optional</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="kw">class</span> IndexCardEntry(BaseModel):</span>
<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a> <span class="co">"""Schema for index card extraction matching curator specification"""</span></span>
<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a> surname: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Family name as written on card"</span>)</span>
<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a> forenames: Optional[<span class="bu">str</span>] <span class="op">=</span> Field(<span class="va">None</span>, description<span class="op">=</span><span class="st">"Given names"</span>)</span>
<span id="cb16-9"><a href="#cb16-9" aria-hidden="true" tabindex="-1"></a> epithet: Optional[<span class="bu">str</span>] <span class="op">=</span> Field(<span class="va">None</span>, description<span class="op">=</span><span class="st">"Title, occupation, or role"</span>)</span>
<span id="cb16-10"><a href="#cb16-10" aria-hidden="true" tabindex="-1"></a> ms_no: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Manuscript number"</span>)</span>
<span id="cb16-11"><a href="#cb16-11" aria-hidden="true" tabindex="-1"></a> description: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Document description with date"</span>)</span>
<span id="cb16-12"><a href="#cb16-12" aria-hidden="true" tabindex="-1"></a> folios: <span class="bu">str</span> <span class="op">=</span> Field(..., description<span class="op">=</span><span class="st">"Folio reference"</span>)</span>
<span id="cb16-13"><a href="#cb16-13" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb16-14"><a href="#cb16-14" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-15"><a href="#cb16-15" aria-hidden="true" tabindex="-1"></a>prompt <span class="op">=</span> <span class="st">"""Extract structured information from this historical library index card and return it as JSON.</span></span>
<span id="cb16-16"><a href="#cb16-16" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-17"><a href="#cb16-17" aria-hidden="true" tabindex="-1"></a><span class="st"> This is an index card from the National Library of Scotland's Advocate's Library collection. Each card documents a person and associated manuscript references.</span></span>
<span id="cb16-18"><a href="#cb16-18" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-19"><a href="#cb16-19" aria-hidden="true" tabindex="-1"></a><span class="st"> Return a JSON object with these exact fields:</span></span>
<span id="cb16-20"><a href="#cb16-20" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-21"><a href="#cb16-21" aria-hidden="true" tabindex="-1"></a><span class="st"> {</span></span>
<span id="cb16-22"><a href="#cb16-22" aria-hidden="true" tabindex="-1"></a><span class="st"> "surname": "Family name exactly as typed (e.g., 'ABAD', 'ABARACA Y BOLEA')",</span></span>
<span id="cb16-23"><a href="#cb16-23" aria-hidden="true" tabindex="-1"></a><span class="st"> "forenames": "Given names (e.g., 'Joseph', 'Thomas') or null if not present",</span></span>
<span id="cb16-24"><a href="#cb16-24" aria-hidden="true" tabindex="-1"></a><span class="st"> "epithet": "Title, occupation, or role (e.g., 'Captain, Spanish Army') or null if not present",</span></span>
<span id="cb16-25"><a href="#cb16-25" aria-hidden="true" tabindex="-1"></a><span class="st"> "ms_no": "Manuscript number exactly as written (e.g., '5538', '5529')",</span></span>
<span id="cb16-26"><a href="#cb16-26" aria-hidden="true" tabindex="-1"></a><span class="st"> "description": "Document description with date (e.g., 'letter of (1783)', 'copy of petition of (ca. 1783)')",</span></span>
<span id="cb16-27"><a href="#cb16-27" aria-hidden="true" tabindex="-1"></a><span class="st"> "folios": "Folio reference exactly as written (e.g., 'f.11', 'f.169')",</span></span>
<span id="cb16-28"><a href="#cb16-28" aria-hidden="true" tabindex="-1"></a><span class="st"> }</span></span>
<span id="cb16-29"><a href="#cb16-29" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-30"><a href="#cb16-30" aria-hidden="true" tabindex="-1"></a><span class="st"> Guidelines:</span></span>
<span id="cb16-31"><a href="#cb16-31" aria-hidden="true" tabindex="-1"></a><span class="st"> - Extract text exactly as it appears - do not correct spelling or expand abbreviations</span></span>
<span id="cb16-32"><a href="#cb16-32" aria-hidden="true" tabindex="-1"></a><span class="st"> - Preserve original punctuation and formatting</span></span>
<span id="cb16-33"><a href="#cb16-33" aria-hidden="true" tabindex="-1"></a><span class="st"> - Use null for optional fields (forenames, epithet, notes) if they are not present or marked with a line"""</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<div id="cell-26" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>results <span class="op">=</span> []</span>
<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> img_path <span class="kw">in</span> tqdm(images):</span>
<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a> image <span class="op">=</span> PILImage.<span class="bu">open</span>(img_path)</span>
<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a> result <span class="op">=</span> query_image_structured(image, prompt, IndexCardEntry, model<span class="op">=</span><span class="st">'qwen/qwen3-vl-8b'</span>)</span>
<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a> results.append((img_path.stem, result))</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<script type="application/vnd.jupyter.widget-view+json">
{"model_id":"e3f68a3e6bf1497680e319a48ce9852d","version_major":2,"version_minor":0,"quarto_mimetype":"application/vnd.jupyter.widget-view+json"}
</script>
</div>
</div>
<div id="cell-27" class="cell" data-execution_count="26">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Display images with extracted data side-by-side</span></span>
<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Two columns: left = image, right = extracted text</span></span>
<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i, (img_stem, result) <span class="kw">in</span> <span class="bu">enumerate</span>(results):</span>
<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a> fig, (ax_img, ax_text) <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, figsize<span class="op">=</span>(<span class="dv">16</span>, <span class="dv">6</span>), </span>
<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a> gridspec_kw<span class="op">=</span>{<span class="st">'width_ratios'</span>: [<span class="dv">1</span>, <span class="dv">1</span>]})</span>
<span id="cb18-7"><a href="#cb18-7" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb18-8"><a href="#cb18-8" aria-hidden="true" tabindex="-1"></a> <span class="co"># Left: Display image</span></span>
<span id="cb18-9"><a href="#cb18-9" aria-hidden="true" tabindex="-1"></a> img <span class="op">=</span> plt.imread(images[i])</span>
<span id="cb18-10"><a href="#cb18-10" aria-hidden="true" tabindex="-1"></a> ax_img.imshow(img)</span>
<span id="cb18-11"><a href="#cb18-11" aria-hidden="true" tabindex="-1"></a> ax_img.axis(<span class="st">'off'</span>)</span>
<span id="cb18-12"><a href="#cb18-12" aria-hidden="true" tabindex="-1"></a> ax_img.set_title(<span class="ss">f"Card </span><span class="sc">{</span>i<span class="op">+</span><span class="dv">1</span><span class="sc">}</span><span class="ss">: </span><span class="sc">{</span>img_stem<span class="sc">}</span><span class="ss">"</span>, fontsize<span class="op">=</span><span class="dv">14</span>, fontweight<span class="op">=</span><span class="st">'bold'</span>)</span>
<span id="cb18-13"><a href="#cb18-13" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb18-14"><a href="#cb18-14" aria-hidden="true" tabindex="-1"></a> <span class="co"># Right: Display extracted data as formatted text</span></span>
<span id="cb18-15"><a href="#cb18-15" aria-hidden="true" tabindex="-1"></a> ax_text.axis(<span class="st">'off'</span>)</span>
<span id="cb18-16"><a href="#cb18-16" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb18-17"><a href="#cb18-17" aria-hidden="true" tabindex="-1"></a> <span class="co"># Format the extracted data nicely</span></span>
<span id="cb18-18"><a href="#cb18-18" aria-hidden="true" tabindex="-1"></a> text_lines <span class="op">=</span> [</span>
<span id="cb18-19"><a href="#cb18-19" aria-hidden="true" tabindex="-1"></a> <span class="st">"Extracted Data:"</span>,</span>
<span id="cb18-20"><a href="#cb18-20" aria-hidden="true" tabindex="-1"></a> <span class="st">""</span>,</span>
<span id="cb18-21"><a href="#cb18-21" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Surname: </span><span class="sc">{</span>result<span class="sc">.</span>surname<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-22"><a href="#cb18-22" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Forenames: </span><span class="sc">{</span>result<span class="sc">.</span>forenames <span class="kw">or</span> <span class="st">'N/A'</span><span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-23"><a href="#cb18-23" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Epithet: </span><span class="sc">{</span>result<span class="sc">.</span>epithet <span class="kw">or</span> <span class="st">'N/A'</span><span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-24"><a href="#cb18-24" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"MS No: </span><span class="sc">{</span>result<span class="sc">.</span>ms_no<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-25"><a href="#cb18-25" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Description: </span><span class="sc">{</span>result<span class="sc">.</span>description<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-26"><a href="#cb18-26" aria-hidden="true" tabindex="-1"></a> <span class="ss">f"Folios: </span><span class="sc">{</span>result<span class="sc">.</span>folios<span class="sc">}</span><span class="ss">"</span>,</span>
<span id="cb18-27"><a href="#cb18-27" aria-hidden="true" tabindex="-1"></a> <span class="st">""</span>,</span>
<span id="cb18-28"><a href="#cb18-28" aria-hidden="true" tabindex="-1"></a> ]</span>
<span id="cb18-29"><a href="#cb18-29" aria-hidden="true" tabindex="-1"></a> <span class="co"># Join and display</span></span>
<span id="cb18-30"><a href="#cb18-30" aria-hidden="true" tabindex="-1"></a> formatted_text <span class="op">=</span> <span class="st">"</span><span class="ch">\n</span><span class="st">"</span>.join(text_lines)</span>
<span id="cb18-31"><a href="#cb18-31" aria-hidden="true" tabindex="-1"></a> ax_text.text(<span class="fl">0.05</span>, <span class="fl">0.95</span>, formatted_text, </span>
<span id="cb18-32"><a href="#cb18-32" aria-hidden="true" tabindex="-1"></a> transform<span class="op">=</span>ax_text.transAxes,</span>
<span id="cb18-33"><a href="#cb18-33" aria-hidden="true" tabindex="-1"></a> fontsize<span class="op">=</span><span class="dv">11</span>,</span>
<span id="cb18-34"><a href="#cb18-34" aria-hidden="true" tabindex="-1"></a> verticalalignment<span class="op">=</span><span class="st">'top'</span>,</span>
<span id="cb18-35"><a href="#cb18-35" aria-hidden="true" tabindex="-1"></a> fontfamily<span class="op">=</span><span class="st">'monospace'</span>,</span>
<span id="cb18-36"><a href="#cb18-36" aria-hidden="true" tabindex="-1"></a> bbox<span class="op">=</span><span class="bu">dict</span>(boxstyle<span class="op">=</span><span class="st">'round'</span>, facecolor<span class="op">=</span><span class="st">'wheat'</span>, alpha<span class="op">=</span><span class="fl">0.3</span>))</span>
<span id="cb18-37"><a href="#cb18-37" aria-hidden="true" tabindex="-1"></a> </span>
<span id="cb18-38"><a href="#cb18-38" aria-hidden="true" tabindex="-1"></a> plt.tight_layout()</span>
<span id="cb18-39"><a href="#cb18-39" aria-hidden="true" tabindex="-1"></a> plt.show()</span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-1.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-2.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-3.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-4.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-5.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
<div class="cell-output cell-output-display">
<div>
<figure class="figure">
<p><img src="advisor-index-cards_files/figure-html/cell-17-output-6.png" class="img-fluid figure-img"></p>
</figure>
</div>
</div>
</div>
</section>
</section>
<section id="manual-ground-truth-evaluation" class="level3" data-number="4.6.2">
<h3 data-number="4.6.2" class="anchored" data-anchor-id="manual-ground-truth-evaluation"><span class="header-section-number">4.6.2</span> 1. Manual Ground Truth Evaluation</h3>
<p><strong>The Gold Standard</strong>: Manually annotate a sample of cards and compare.</p>
<p><strong>Pros</strong>: - Most accurate measure of performance - Catches all types of errors - Builds training data for future improvements</p>
<p><strong>Cons</strong>: - Time consuming - Requires expert annotators - Limited sample size</p>
<p><strong>Best for</strong>: Final validation, establishing baselines, understanding failure modes</p>
<div id="cell-29" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Load manually annotated ground truth</span></span>
<span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Compare predictions to ground truth</span></span>
<span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Calculate field-level accuracy</span></span>
<span id="cb19-4"><a href="#cb19-4" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Example metrics:</span></span>
<span id="cb19-6"><a href="#cb19-6" aria-hidden="true" tabindex="-1"></a><span class="co"># - Exact match accuracy per field</span></span>
<span id="cb19-7"><a href="#cb19-7" aria-hidden="true" tabindex="-1"></a><span class="co"># - Character error rate</span></span>
<span id="cb19-8"><a href="#cb19-8" aria-hidden="true" tabindex="-1"></a><span class="co"># - Common error patterns</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="cross-model-evaluation-model-as-judge" class="level3" data-number="4.6.3">
<h3 data-number="4.6.3" class="anchored" data-anchor-id="cross-model-evaluation-model-as-judge"><span class="header-section-number">4.6.3</span> 2. Cross-Model Evaluation (Model-as-Judge)</h3>
<p><strong>The Pragmatic Approach</strong>: Use a stronger/different model to evaluate outputs.</p>
<p><strong>Pros</strong>: - Much faster than manual annotation - Can evaluate full dataset - Good for catching obvious errors</p>
<p><strong>Cons</strong>: - Requires access to multiple models - May miss subtle errors - Judge model can be wrong too</p>
<p><strong>Best for</strong>: Large-scale quality monitoring, automated testing, identifying problem areas for manual review</p>
<div id="cell-31" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Implement model-as-judge evaluation</span></span>
<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="co"># - Extract with Model A (e.g., local Qwen)</span></span>
<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="co"># - Show image + extraction to Model B (e.g., Claude/GPT-4)</span></span>
<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="co"># - Ask Model B to rate accuracy and identify errors</span></span>
<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="co"># - Aggregate results</span></span>
<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Example judge prompt:</span></span>
<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a><span class="co"># """</span></span>
<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="co"># Compare this extracted data to the index card image:</span></span>
<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a><span class="co"># [extraction]</span></span>
<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb20-12"><a href="#cb20-12" aria-hidden="true" tabindex="-1"></a><span class="co"># For each field, rate accuracy:</span></span>
<span id="cb20-13"><a href="#cb20-13" aria-hidden="true" tabindex="-1"></a><span class="co"># - Correct: Field matches card exactly</span></span>
<span id="cb20-14"><a href="#cb20-14" aria-hidden="true" tabindex="-1"></a><span class="co"># - Minor error: Small typo or formatting difference</span></span>
<span id="cb20-15"><a href="#cb20-15" aria-hidden="true" tabindex="-1"></a><span class="co"># - Major error: Wrong information</span></span>
<span id="cb20-16"><a href="#cb20-16" aria-hidden="true" tabindex="-1"></a><span class="co"># - Missing: Field is on card but not extracted</span></span>
<span id="cb20-17"><a href="#cb20-17" aria-hidden="true" tabindex="-1"></a><span class="co"># """</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="internal-consistency-checks" class="level3" data-number="4.6.4">
<h3 data-number="4.6.4" class="anchored" data-anchor-id="internal-consistency-checks"><span class="header-section-number">4.6.4</span> 3. Internal Consistency Checks</h3>
<p><strong>The Automated Approach</strong>: Use business rules and patterns to identify suspicious outputs.</p>
<p><strong>Examples</strong>: - Manuscript numbers should follow known patterns - Dates should be within expected ranges - Folio references have consistent formats - Certain fields should always be present</p>
<p><strong>Pros</strong>: - Completely automated - Fast - can run on full dataset - No additional model costs</p>
<p><strong>Cons</strong>: - Only catches specific error types - Requires domain knowledge to design rules - Can miss errors that follow valid patterns</p>
<p><strong>Best for</strong>: Flagging outliers for review, automated quality gates, monitoring production systems</p>
<div id="cell-33" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Implement consistency checks</span></span>
<span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a><span class="co"># def validate_extraction(entry: IndexCardEntry) -&gt; list[str]:</span></span>
<span id="cb21-4"><a href="#cb21-4" aria-hidden="true" tabindex="-1"></a><span class="co"># """Run validation checks and return list of warnings."""</span></span>
<span id="cb21-5"><a href="#cb21-5" aria-hidden="true" tabindex="-1"></a><span class="co"># warnings = []</span></span>
<span id="cb21-6"><a href="#cb21-6" aria-hidden="true" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb21-7"><a href="#cb21-7" aria-hidden="true" tabindex="-1"></a><span class="co"># # Check MS number format</span></span>
<span id="cb21-8"><a href="#cb21-8" aria-hidden="true" tabindex="-1"></a><span class="co"># if not re.match(r'^\d+', entry.ms_no):</span></span>
<span id="cb21-9"><a href="#cb21-9" aria-hidden="true" tabindex="-1"></a><span class="co"># warnings.append(f"Unusual MS number format: {entry.ms_no}")</span></span>
<span id="cb21-10"><a href="#cb21-10" aria-hidden="true" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb21-11"><a href="#cb21-11" aria-hidden="true" tabindex="-1"></a><span class="co"># # Check for dates in expected range</span></span>
<span id="cb21-12"><a href="#cb21-12" aria-hidden="true" tabindex="-1"></a><span class="co"># dates = re.findall(r'\d{4}', entry.description)</span></span>
<span id="cb21-13"><a href="#cb21-13" aria-hidden="true" tabindex="-1"></a><span class="co"># for date in dates:</span></span>
<span id="cb21-14"><a href="#cb21-14" aria-hidden="true" tabindex="-1"></a><span class="co"># if not (1500 &lt;= int(date) &lt;= 1950):</span></span>
<span id="cb21-15"><a href="#cb21-15" aria-hidden="true" tabindex="-1"></a><span class="co"># warnings.append(f"Date outside expected range: {date}")</span></span>
<span id="cb21-16"><a href="#cb21-16" aria-hidden="true" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb21-17"><a href="#cb21-17" aria-hidden="true" tabindex="-1"></a><span class="co"># # Check folio format</span></span>
<span id="cb21-18"><a href="#cb21-18" aria-hidden="true" tabindex="-1"></a><span class="co"># if not re.match(r'^f+\.?\s*\d+', entry.folios, re.IGNORECASE):</span></span>
<span id="cb21-19"><a href="#cb21-19" aria-hidden="true" tabindex="-1"></a><span class="co"># warnings.append(f"Unusual folio format: {entry.folios}")</span></span>
<span id="cb21-20"><a href="#cb21-20" aria-hidden="true" tabindex="-1"></a><span class="co"># </span></span>
<span id="cb21-21"><a href="#cb21-21" aria-hidden="true" tabindex="-1"></a><span class="co"># return warnings</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="confidence-scoring" class="level3" data-number="4.6.5">
<h3 data-number="4.6.5" class="anchored" data-anchor-id="confidence-scoring"><span class="header-section-number">4.6.5</span> 4. Confidence Scoring</h3>
<p>Many VLM APIs return confidence scores or logprobs. We can use these to identify uncertain extractions.</p>
<p><strong>Pros</strong>: - No additional cost or models needed - Can prioritize review efforts - Helps establish quality thresholds</p>
<p><strong>Cons</strong>: - Not all models/APIs provide confidence scores - High confidence doesn’t guarantee correctness - Requires calibration</p>
<p><strong>Best for</strong>: Prioritizing manual review, quality-based routing, understanding model uncertainty</p>
<div id="cell-35" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: If available, extract and analyze confidence scores</span></span>
<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot distribution of confidence scores</span></span>
<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Correlate confidence with manual evaluation results</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="combining-evaluation-approaches" class="level3" data-number="4.6.6">
<h3 data-number="4.6.6" class="anchored" data-anchor-id="combining-evaluation-approaches"><span class="header-section-number">4.6.6</span> Combining Evaluation Approaches</h3>
<p>In practice, a robust evaluation strategy uses multiple approaches:</p>
<ol type="1">
<li><strong>Start with manual ground truth</strong> on a small sample (~50-100 cards) to establish baseline accuracy</li>
<li><strong>Use consistency checks</strong> to automatically flag suspicious outputs</li>
<li><strong>Apply model-as-judge</strong> on a larger sample to monitor quality</li>
<li><strong>Prioritize review</strong> using confidence scores or validation warnings</li>
<li><strong>Continuous monitoring</strong> as you process the full collection</li>
</ol>
<p>This gives you both rigorous accuracy metrics and practical quality assurance at scale.</p>
</section>
</section>
<section id="batch-processing" class="level2" data-number="4.7">
<h2 data-number="4.7" class="anchored" data-anchor-id="batch-processing"><span class="header-section-number">4.7</span> Batch Processing</h2>
<p>Now let’s process a larger batch of cards and analyze the results.</p>
<div id="cell-38" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Process all available cards</span></span>
<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Track timing, failures, warnings</span></span>
<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Save results to file</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
<section id="results-analysis" class="level3" data-number="4.7.1">
<h3 data-number="4.7.1" class="anchored" data-anchor-id="results-analysis"><span class="header-section-number">4.7.1</span> Results Analysis</h3>
<div id="cell-40" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Analyze batch results</span></span>
<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a><span class="co"># - Success rate</span></span>
<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a><span class="co"># - Failed to parse rate</span></span>
<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a><span class="co"># - Validation warnings distribution</span></span>
<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="co"># - Processing time statistics</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
</section>
<section id="edge-cases-and-failure-modes" class="level2" data-number="4.8">
<h2 data-number="4.8" class="anchored" data-anchor-id="edge-cases-and-failure-modes"><span class="header-section-number">4.8</span> Edge Cases and Failure Modes</h2>
<p>What kinds of cards are hard for the model to process?</p>
<div id="cell-42" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Examine failed/problematic extractions</span></span>
<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Common patterns:</span></span>
<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="co"># - Handwritten corrections/additions</span></span>
<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="co"># - Faded or damaged cards</span></span>
<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a><span class="co"># - Unusual formats or layouts</span></span>
<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a><span class="co"># - Multiple entries per card</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="export-for-cataloging" class="level2" data-number="4.9">
<h2 data-number="4.9" class="anchored" data-anchor-id="export-for-cataloging"><span class="header-section-number">4.9</span> Export for Cataloging</h2>
<p>Convert the extracted data to formats suitable for library systems.</p>
<div id="cell-44" class="cell">
<div class="code-copy-outer-scaffold"><div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># </span><span class="al">TODO</span><span class="co">: Export to CSV/JSON/XML</span></span>
<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Consider catalog system requirements (MARC, Dublin Core, etc.)</span></span></code></pre></div><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></div>
</div>
</section>
<section id="next-steps" class="level2" data-number="4.10">
<h2 data-number="4.10" class="anchored" data-anchor-id="next-steps"><span class="header-section-number">4.10</span> Next Steps</h2>
<p>This notebook demonstrates the core extraction and evaluation workflow. For production deployment, you would need:</p>
<ol type="1">
<li><strong>Robust error handling</strong> - retry logic, fallbacks, logging</li>
<li><strong>Quality assurance workflow</strong> - human review interface for flagged items</li>
<li><strong>Batch processing infrastructure</strong> - queue management, progress tracking</li>
<li><strong>Model optimization</strong> - prompt tuning, model selection, cost optimization</li>
</ol>
<p>These production considerations are covered in the appendices and separate infrastructure documentation.</p>
</section>
<section id="key-takeaways" class="level2" data-number="4.11">
<h2 data-number="4.11" class="anchored" data-anchor-id="key-takeaways"><span class="header-section-number">4.11</span> Key Takeaways</h2>
<ol type="1">
<li><strong>Simple schemas work better</strong> - Don’t over-engineer the structure</li>
<li><strong>Multiple evaluation strategies</strong> - Combine automated and manual approaches</li>
<li><strong>Plan for failure</strong> - Build in quality flags and review workflows</li>
<li><strong>Domain expertise matters</strong> - Work closely with catalogers to define requirements</li>
<li><strong>Iterate based on results</strong> - Start small, evaluate, adjust, scale</li>
</ol>
</section>
</main> <!-- /main -->
<script id="quarto-html-after-body" type="application/javascript">
window.document.addEventListener("DOMContentLoaded", function (event) {
const icon = "";
const anchorJS = new window.AnchorJS();
anchorJS.options = {
placement: 'right',
icon: icon
};
anchorJS.add('.anchored');
const isCodeAnnotation = (el) => {
for (const clz of el.classList) {
if (clz.startsWith('code-annotation-')) {
return true;
}
}
return false;
}
const onCopySuccess = function(e) {
// button target
const button = e.trigger;
// don't keep focus
button.blur();
// flash "checked"
button.classList.add('code-copy-button-checked');
var currentTitle = button.getAttribute("title");
button.setAttribute("title", "Copied!");
let tooltip;
if (window.bootstrap) {
button.setAttribute("data-bs-toggle", "tooltip");
button.setAttribute("data-bs-placement", "left");
button.setAttribute("data-bs-title", "Copied!");
tooltip = new bootstrap.Tooltip(button,
{ trigger: "manual",
customClass: "code-copy-button-tooltip",
offset: [0, -8]});
tooltip.show();
}
setTimeout(function() {
if (tooltip) {
tooltip.hide();
button.removeAttribute("data-bs-title");
button.removeAttribute("data-bs-toggle");
button.removeAttribute("data-bs-placement");
}
button.setAttribute("title", currentTitle);
button.classList.remove('code-copy-button-checked');
}, 1000);
// clear code selection
e.clearSelection();
}
const getTextToCopy = function(trigger) {
const outerScaffold = trigger.parentElement.cloneNode(true);
const codeEl = outerScaffold.querySelector('code');
for (const childEl of codeEl.children) {
if (isCodeAnnotation(childEl)) {
childEl.remove();
}
}
return codeEl.innerText;
}
const clipboard = new window.ClipboardJS('.code-copy-button:not([data-in-quarto-modal])', {
text: getTextToCopy
});
clipboard.on('success', onCopySuccess);
if (window.document.getElementById('quarto-embedded-source-code-modal')) {
const clipboardModal = new window.ClipboardJS('.code-copy-button[data-in-quarto-modal]', {
text: getTextToCopy,
container: window.document.getElementById('quarto-embedded-source-code-modal')
});
clipboardModal.on('success', onCopySuccess);
}
var localhostRegex = new RegExp(/^(?:http|https):\/\/localhost\:?[0-9]*\//);
var mailtoRegex = new RegExp(/^mailto:/);
var filterRegex = new RegExp('/' + window.location.host + '/');
var isInternal = (href) => {
return filterRegex.test(href) || localhostRegex.test(href) || mailtoRegex.test(href);
}
// Inspect non-navigation links and adorn them if external
var links = window.document.querySelectorAll('a[href]:not(.nav-link):not(.navbar-brand):not(.toc-action):not(.sidebar-link):not(.sidebar-item-toggle):not(.pagination-link):not(.no-external):not([aria-hidden]):not(.dropdown-item):not(.quarto-navigation-tool):not(.about-link)');
for (var i=0; i<links.length; i++) {
const link = links[i];
if (!isInternal(link.href)) {
// undo the damage that might have been done by quarto-nav.js in the case of
// links that we want to consider external
if (link.dataset.originalHref !== undefined) {
link.href = link.dataset.originalHref;
}
}
}
function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
const config = {
allowHTML: true,
maxWidth: 500,
delay: 100,
arrow: false,
appendTo: function(el) {
return el.parentElement;
},
interactive: true,
interactiveBorder: 10,
theme: 'quarto',
placement: 'bottom-start',
};
if (contentFn) {
config.content = contentFn;
}
if (onTriggerFn) {
config.onTrigger = onTriggerFn;
}
if (onUntriggerFn) {
config.onUntrigger = onUntriggerFn;
}
window.tippy(el, config);
}
const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
for (var i=0; i<noterefs.length; i++) {
const ref = noterefs[i];
tippyHover(ref, function() {
// use id or data attribute instead here
let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
try { href = new URL(href).hash; } catch {}
const id = href.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note) {
return note.innerHTML;
} else {
return "";
}
});
}
const xrefs = window.document.querySelectorAll('a.quarto-xref');
const processXRef = (id, note) => {
// Strip column container classes
const stripColumnClz = (el) => {
el.classList.remove("page-full", "page-columns");
if (el.children) {
for (const child of el.children) {
stripColumnClz(child);
}
}
}
stripColumnClz(note)
if (id === null || id.startsWith('sec-')) {
// Special case sections, only their first couple elements
const container = document.createElement("div");
if (note.children && note.children.length > 2) {
container.appendChild(note.children[0].cloneNode(true));
for (let i = 1; i < note.children.length; i++) {
const child = note.children[i];
if (child.tagName === "P" && child.innerText === "") {
continue;
} else {
container.appendChild(child.cloneNode(true));
break;
}
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(container);
}
return container.innerHTML
} else {
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
return note.innerHTML;
}
} else {
// Remove any anchor links if they are present
const anchorLink = note.querySelector('a.anchorjs-link');
if (anchorLink) {
anchorLink.remove();
}
if (window.Quarto?.typesetMath) {
window.Quarto.typesetMath(note);
}
if (note.classList.contains("callout")) {
return note.outerHTML;
} else {
return note.innerHTML;
}
}
}
for (var i=0; i<xrefs.length; i++) {
const xref = xrefs[i];
tippyHover(xref, undefined, function(instance) {
instance.disable();
let url = xref.getAttribute('href');
let hash = undefined;
if (url.startsWith('#')) {
hash = url;
} else {
try { hash = new URL(url).hash; } catch {}
}
if (hash) {
const id = hash.replace(/^#\/?/, "");
const note = window.document.getElementById(id);
if (note !== null) {
try {
const html = processXRef(id, note.cloneNode(true));
instance.setContent(html);
} finally {
instance.enable();
instance.show();
}
} else {
// See if we can fetch this
fetch(url.split('#')[0])
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.getElementById(id);
if (note !== null) {
const html = processXRef(id, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
} else {
// See if we can fetch a full url (with no hash to target)
// This is a special case and we should probably do some content thinning / targeting
fetch(url)
.then(res => res.text())
.then(html => {
const parser = new DOMParser();
const htmlDoc = parser.parseFromString(html, "text/html");
const note = htmlDoc.querySelector('main.content');
if (note !== null) {
// This should only happen for chapter cross references
// (since there is no id in the URL)
// remove the first header
if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
note.children[0].remove();
}
const html = processXRef(null, note);
instance.setContent(html);
}
}).finally(() => {
instance.enable();
instance.show();
});
}
}, function(instance) {
});
}
let selectedAnnoteEl;
const selectorForAnnotation = ( cell, annotation) => {
let cellAttr = 'data-code-cell="' + cell + '"';
let lineAttr = 'data-code-annotation="' + annotation + '"';
const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
return selector;
}
const selectCodeLines = (annoteEl) => {
const doc = window.document;
const targetCell = annoteEl.getAttribute("data-target-cell");
const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
const lines = annoteSpan.getAttribute("data-code-lines").split(",");
const lineIds = lines.map((line) => {
return targetCell + "-" + line;
})
let top = null;
let height = null;
let parent = null;
if (lineIds.length > 0) {
//compute the position of the single el (top and bottom and make a div)
const el = window.document.getElementById(lineIds[0]);
top = el.offsetTop;
height = el.offsetHeight;
parent = el.parentElement.parentElement;
if (lineIds.length > 1) {
const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
const bottom = lastEl.offsetTop + lastEl.offsetHeight;
height = bottom - top;
}
if (top !== null && height !== null && parent !== null) {
// cook up a div (if necessary) and position it
let div = window.document.getElementById("code-annotation-line-highlight");
if (div === null) {
div = window.document.createElement("div");
div.setAttribute("id", "code-annotation-line-highlight");
div.style.position = 'absolute';
parent.appendChild(div);
}
div.style.top = top - 2 + "px";
div.style.height = height + 4 + "px";
div.style.left = 0;
let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
if (gutterDiv === null) {
gutterDiv = window.document.createElement("div");
gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
gutterDiv.style.position = 'absolute';
const codeCell = window.document.getElementById(targetCell);
const gutter = codeCell.querySelector('.code-annotation-gutter');
gutter.appendChild(gutterDiv);
}
gutterDiv.style.top = top - 2 + "px";
gutterDiv.style.height = height + 4 + "px";
}
selectedAnnoteEl = annoteEl;
}
};
const unselectCodeLines = () => {
const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
elementsIds.forEach((elId) => {
const div = window.document.getElementById(elId);
if (div) {
div.remove();
}
});
selectedAnnoteEl = undefined;
};
// Handle positioning of the toggle
window.addEventListener(
"resize",
throttle(() => {
elRect = undefined;
if (selectedAnnoteEl) {
selectCodeLines(selectedAnnoteEl);
}
}, 10)
);
function throttle(fn, ms) {
let throttle = false;
let timer;
return (...args) => {
if(!throttle) { // first call gets through
fn.apply(this, args);
throttle = true;
} else { // all the others get throttled
if(timer) clearTimeout(timer); // cancel #2
timer = setTimeout(() => {
fn.apply(this, args);
timer = throttle = false;
}, ms);
}
};
}
// Attach click handler to the DT
const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
for (const annoteDlNode of annoteDls) {
annoteDlNode.addEventListener('click', (event) => {
const clickedEl = event.target;
if (clickedEl !== selectedAnnoteEl) {
unselectCodeLines();
const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
if (activeEl) {
activeEl.classList.remove('code-annotation-active');
}
selectCodeLines(clickedEl);
clickedEl.classList.add('code-annotation-active');
} else {
// Unselect the line
unselectCodeLines();
clickedEl.classList.remove('code-annotation-active');
}
});
}
const findCites = (el) => {
const parentEl = el.parentElement;
if (parentEl) {
const cites = parentEl.dataset.cites;
if (cites) {
return {
el,
cites: cites.split(' ')
};
} else {
return findCites(el.parentElement)
}
} else {
return undefined;
}
};
var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
for (var i=0; i<bibliorefs.length; i++) {
const ref = bibliorefs[i];
const citeInfo = findCites(ref);
if (citeInfo) {
tippyHover(citeInfo.el, function() {
var popup = window.document.createElement('div');
citeInfo.cites.forEach(function(cite) {
var citeDiv = window.document.createElement('div');
citeDiv.classList.add('hanging-indent');
citeDiv.classList.add('csl-entry');
var biblioDiv = window.document.getElementById('ref-' + cite);
if (biblioDiv) {
citeDiv.innerHTML = biblioDiv.innerHTML;
}
popup.appendChild(citeDiv);
});
return popup.innerHTML;
});
}
}
});
</script>
<nav class="page-navigation">
<div class="nav-page nav-page-previous">
<a href="../../patterns/structured-generation/vlm-structured-generation.html" class="pagination-link" aria-label="Structured Information Extraction with Vision Language Models">
<i class="bi bi-arrow-left-short"></i> <span class="nav-page-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span>
</a>
</div>
<div class="nav-page nav-page-next">
</div>
</nav>
</div> <!-- /content -->
</body></html>