Spaces:

davanstrien
/

ai-patterns-for-glam

Running

App Files Files Community

davanstrien HF Staff commited on Feb 16

Commit

b292d4f

verified ·

1 Parent(s): 2f7da2f

Upload folder using huggingface_hub

Browse files

Files changed (38) hide show

.gitattributes +18 -0
assets/.DS_Store +0 -0
assets/vllm-structured-generation/.DS_Store +0 -0
assets/vllm-structured-generation/indexes/DSC00168.JPG +2 -2
assets/vllm-structured-generation/indexes/DSC00169.JPG +2 -2
assets/vllm-structured-generation/indexes/DSC00170.JPG +2 -2
assets/vllm-structured-generation/indexes/DSC00172.JPG +2 -2
assets/vllm-structured-generation/indexes/DSC00173.JPG +2 -2
index.html +6 -14
patterns/structured-generation/intro.html +55 -23
patterns/structured-generation/vlm-structured-generation.html +0 -0
patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-11-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-15-output-1.png +2 -2
patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-16-output-1.png +2 -2
patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-21-output-1.png +2 -2
patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-9-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-11-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-12-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-14-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-15-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-16-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-17-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-18-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-19-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-21-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-22-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-24-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-27-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-3-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-4-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-5-output-1.png +3 -0
patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-9-output-1.png +3 -0
patterns/what-is-an-ai-pattern.html +41 -16
search.json +65 -142
site_libs/quarto-diagram/mermaid-init.js +275 -0
site_libs/quarto-diagram/mermaid.css +13 -0
site_libs/quarto-diagram/mermaid.min.js +0 -0
site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css +236 -0

.gitattributes CHANGED Viewed

@@ -82,3 +82,21 @@ patterns/structured-generation/advisor-index-cards_files/figure-html/cell-17-out
 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-17-output-5.png filter=lfs diff=lfs merge=lfs -text
 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-17-output-6.png filter=lfs diff=lfs merge=lfs -text
 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-3-output-1.png filter=lfs diff=lfs merge=lfs -text

 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-17-output-5.png filter=lfs diff=lfs merge=lfs -text
 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-17-output-6.png filter=lfs diff=lfs merge=lfs -text
 patterns/structured-generation/advisor-index-cards_files/figure-html/cell-3-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-11-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-9-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-11-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-12-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-14-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-15-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-16-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-17-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-18-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-19-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-21-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-22-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-24-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-27-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-3-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-4-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-5-output-1.png filter=lfs diff=lfs merge=lfs -text
+patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-9-output-1.png filter=lfs diff=lfs merge=lfs -text

assets/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

assets/vllm-structured-generation/.DS_Store ADDED Viewed

Binary file (8.2 kB). View file

assets/vllm-structured-generation/indexes/DSC00168.JPG CHANGED Viewed

Git LFS Details

SHA256: 6b67ba5fcdb404ff96cbafe3c9c80116acb1a37169bd4662fa3fb9b3772894af
Pointer size: 132 Bytes
Size of remote file: 5.44 MB

Git LFS Details

SHA256: 98bb861c70a1ff9b160f0cb6b2397fb5592bbe9d7c6d1613ab2060ac1a857ca6
Pointer size: 132 Bytes
Size of remote file: 1.82 MB

assets/vllm-structured-generation/indexes/DSC00169.JPG CHANGED Viewed

Git LFS Details

SHA256: cf9524fba435cd302f0964f938987a531d53b87824b8bc3df49d9287f6a62d6e
Pointer size: 132 Bytes
Size of remote file: 5.39 MB

Git LFS Details

SHA256: e10555320aba05ee63258b3c58c010190bda7a6a0de6fdd697b4047c90442a42
Pointer size: 132 Bytes
Size of remote file: 2.01 MB

assets/vllm-structured-generation/indexes/DSC00170.JPG CHANGED Viewed

Git LFS Details

SHA256: 3eb8696f091bb4af5fe1b479f4ac137d9b135ad146775e5e35e0307580c32d46
Pointer size: 132 Bytes
Size of remote file: 5.44 MB

Git LFS Details

SHA256: 245773464985f66a01a239f217a90d2aef419be3f5a15a13f5a2e18d20fb31e2
Pointer size: 132 Bytes
Size of remote file: 1.9 MB

assets/vllm-structured-generation/indexes/DSC00172.JPG CHANGED Viewed

Git LFS Details

SHA256: 964a292b92c851addddd1c837a5a9ed78ff131863c09903b25edc7c0efd50ffa
Pointer size: 131 Bytes
Size of remote file: 164 kB

Git LFS Details

SHA256: abe9ec77730ddbf86655d33ea03828690f44a18fa6adbc7cab9d9eaa169a1b29
Pointer size: 131 Bytes
Size of remote file: 119 kB

assets/vllm-structured-generation/indexes/DSC00173.JPG CHANGED Viewed

Git LFS Details

SHA256: 5a611f14635fcde06f703ddf2d30a52a8d069fa7cb9f0bb6412cf7e31964e5de
Pointer size: 132 Bytes
Size of remote file: 5.42 MB

Git LFS Details

SHA256: 35e04bd6e2aac21ac17c3aef0f45b4c4215275f5a01bbb9461be756c7b38156e
Pointer size: 132 Bytes
Size of remote file: 1.84 MB

index.html CHANGED Viewed

@@ -2,12 +2,12 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.8.25">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 <meta name="author" content="Daniel van Strien">
-<meta name="dcterms.date" content="2025-10-27">
 <title>AI Design Patterns for GLAM</title>
 <style>
@@ -40,7 +40,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="site_libs/quarto-html/anchor.min.js"></script>
 <link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="site_libs/quarto-html/quarto-syntax-highlighting-7b89279ff1a6dce999919e0e67d4d9ec.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
@@ -97,9 +97,7 @@ ul.task-list li input[type="checkbox"] {
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
-    <div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
-      <a href="./index.html" class="sidebar-logo-link">
-      </a>
     <div class="sidebar-title mb-0 py-0">
       <a href="./">AI Design Patterns for GLAM</a>
     </div>
@@ -150,7 +148,7 @@ ul.task-list li input[type="checkbox"] {
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="./patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
@@ -158,12 +156,6 @@ ul.task-list li input[type="checkbox"] {
   <a href="./patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
-</li>
-          <li class="sidebar-item">
-  <div class="sidebar-item-container">
-  <a href="./patterns/structured-generation/advisor-index-cards.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></span></a>
-  </div>
 </li>
       </ul>
   </li>
@@ -213,7 +205,7 @@ ul.task-list li input[type="checkbox"] {
     <div>
     <div class="quarto-title-meta-heading">Published</div>
     <div class="quarto-title-meta-contents">
-      <p class="date">October 27, 2025</p>
     </div>
   </div>

 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
+<meta name="generator" content="quarto-1.8.27">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 <meta name="author" content="Daniel van Strien">
+<meta name="dcterms.date" content="2026-02-16">
 <title>AI Design Patterns for GLAM</title>
 <style>
 <script src="site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="site_libs/quarto-html/anchor.min.js"></script>
 <link href="site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="pt-lg-2 mt-2 text-left sidebar-header">
     <div class="sidebar-title mb-0 py-0">
       <a href="./">AI Design Patterns for GLAM</a>
     </div>
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="./patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <a href="./patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
 </li>
       </ul>
   </li>
     <div>
     <div class="quarto-title-meta-heading">Published</div>
     <div class="quarto-title-meta-contents">
+      <p class="date">February 16, 2026</p>
     </div>
   </div>

patterns/structured-generation/intro.html CHANGED Viewed

@@ -2,12 +2,12 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.8.25">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
-<title>2&nbsp; Example Pattern: Structured Document Processing – AI Design Patterns for GLAM</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
@@ -39,7 +39,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-7b89279ff1a6dce999919e0e67d4d9ec.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
@@ -70,6 +70,9 @@ ul.task-list li input[type="checkbox"] {
     "search-label": "Search"
   }
 }</script>
 </head>
@@ -83,7 +86,7 @@ ul.task-list li input[type="checkbox"] {
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
-        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></a></li></ol></nav>
         <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         </a>
       <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
@@ -96,9 +99,7 @@ ul.task-list li input[type="checkbox"] {
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
-    <div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
-      <a href="../../index.html" class="sidebar-logo-link">
-      </a>
     <div class="sidebar-title mb-0 py-0">
       <a href="../../">AI Design Patterns for GLAM</a>
     </div>
@@ -149,7 +150,7 @@ ul.task-list li input[type="checkbox"] {
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="../../patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link active">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
@@ -157,12 +158,6 @@ ul.task-list li input[type="checkbox"] {
   <a href="../../patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
-</li>
-          <li class="sidebar-item">
-  <div class="sidebar-item-container">
-  <a href="../../patterns/structured-generation/advisor-index-cards.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></span></a>
-  </div>
 </li>
       </ul>
   </li>
@@ -180,17 +175,18 @@ ul.task-list li input[type="checkbox"] {
   <li><a href="#dont-we-just-need-ocr" id="toc-dont-we-just-need-ocr" class="nav-link" data-scroll-target="#dont-we-just-need-ocr"><span class="header-section-number">2.2</span> Don’t we just need OCR?</a></li>
   <li><a href="#solution-overview" id="toc-solution-overview" class="nav-link" data-scroll-target="#solution-overview"><span class="header-section-number">2.3</span> Solution Overview</a>
   <ul class="collapse">
-  <li><a href="#what-this-pattern-looks-like" id="toc-what-this-pattern-looks-like" class="nav-link" data-scroll-target="#what-this-pattern-looks-like"><span class="header-section-number">2.3.1</span> What this pattern looks like?</a></li>
   </ul></li>
   </ul>
 </nav>
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
-<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></a></li></ol></nav>
 <div class="quarto-title">
-<h1 class="title"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></h1>
 </div>
@@ -209,21 +205,57 @@ ul.task-list li input[type="checkbox"] {
 <section id="the-challenge" class="level2" data-number="2.1">
 <h2 data-number="2.1" class="anchored" data-anchor-id="the-challenge"><span class="header-section-number">2.1</span> The Challenge</h2>
-<p>Many GLAM institutions have vast collections of structured documents - index cards, forms, registers - containing valuable information locked in physical or image formats. Manual transcription doesn’t scale, but the structured nature of these documents makes them ideal candidates for AI-powered processing.</p>
 </section>
 <section id="dont-we-just-need-ocr" class="level2" data-number="2.2">
 <h2 data-number="2.2" class="anchored" data-anchor-id="dont-we-just-need-ocr"><span class="header-section-number">2.2</span> Don’t we just need OCR?</h2>
-<!-- WHY OCR might not be sufficient -->
 </section>
 <section id="solution-overview" class="level2" data-number="2.3">
 <h2 data-number="2.3" class="anchored" data-anchor-id="solution-overview"><span class="header-section-number">2.3</span> Solution Overview</h2>
-<p>This pattern uses Visual Language Models (VLMs) combined with structured output generation to automatically extract information from document images while preserving the original structure and relationships.</p>
 <section id="what-this-pattern-looks-like" class="level3" data-number="2.3.1">
-<h3 data-number="2.3.1" class="anchored" data-anchor-id="what-this-pattern-looks-like"><span class="header-section-number">2.3.1</span> What this pattern looks like?</h3>
-<!-- Example diagram or illustration of the pattern -->
-</section>
 </section>
 </main> <!-- /main -->

 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
+<meta name="generator" content="quarto-1.8.27">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+<title>2&nbsp; Structured Document Processing – AI Design Patterns for GLAM</title>
 <style>
 code{white-space: pre-wrap;}
 span.smallcaps{font-variant: small-caps;}
 <script src="../../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../../site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../../site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
     "search-label": "Search"
   }
 }</script>
+<script src="../../site_libs/quarto-diagram/mermaid.min.js"></script>
+<script src="../../site_libs/quarto-diagram/mermaid-init.js"></script>
+<link href="../../site_libs/quarto-diagram/mermaid.css" rel="stylesheet">
 </head>
       <button type="button" class="quarto-btn-toggle btn" data-bs-toggle="collapse" role="button" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         <i class="bi bi-layout-text-sidebar-reverse"></i>
       </button>
+        <nav class="quarto-page-breadcrumbs" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></a></li></ol></nav>
         <a class="flex-grow-1" role="navigation" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item" aria-controls="quarto-sidebar" aria-expanded="false" aria-label="Toggle sidebar navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
         </a>
       <button type="button" class="btn quarto-search-button" aria-label="Search" onclick="window.quartoOpenSearch();">
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="pt-lg-2 mt-2 text-left sidebar-header">
     <div class="sidebar-title mb-0 py-0">
       <a href="../../">AI Design Patterns for GLAM</a>
     </div>
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="../../patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link active">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <a href="../../patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
 </li>
       </ul>
   </li>
   <li><a href="#dont-we-just-need-ocr" id="toc-dont-we-just-need-ocr" class="nav-link" data-scroll-target="#dont-we-just-need-ocr"><span class="header-section-number">2.2</span> Don’t we just need OCR?</a></li>
   <li><a href="#solution-overview" id="toc-solution-overview" class="nav-link" data-scroll-target="#solution-overview"><span class="header-section-number">2.3</span> Solution Overview</a>
   <ul class="collapse">
+  <li><a href="#what-this-pattern-looks-like" id="toc-what-this-pattern-looks-like" class="nav-link" data-scroll-target="#what-this-pattern-looks-like"><span class="header-section-number">2.3.1</span> What this pattern looks like</a></li>
   </ul></li>
+  <li><a href="#when-to-use-this-pattern" id="toc-when-to-use-this-pattern" class="nav-link" data-scroll-target="#when-to-use-this-pattern"><span class="header-section-number">2.4</span> When to Use This Pattern</a></li>
   </ul>
 </nav>
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
+<header id="title-block-header" class="quarto-title-block default"><nav class="quarto-page-breadcrumbs quarto-title-breadcrumbs d-none d-lg-block" aria-label="breadcrumb"><ol class="breadcrumb"><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html">Structured Information Extraction</a></li><li class="breadcrumb-item"><a href="../../patterns/structured-generation/intro.html"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></a></li></ol></nav>
 <div class="quarto-title">
+<h1 class="title"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></h1>
 </div>
 <section id="the-challenge" class="level2" data-number="2.1">
 <h2 data-number="2.1" class="anchored" data-anchor-id="the-challenge"><span class="header-section-number">2.1</span> The Challenge</h2>
+<p>Many GLAM institutions have vast collections of structured documents—index cards, forms, registers—containing valuable information locked in physical or image formats. Manual transcription doesn’t scale, but the structured nature of these documents makes them ideal candidates for AI-powered processing.</p>
+<p>Unlocking this data means better discovery, new research possibilities, and integration with modern cataloguing systems.</p>
 </section>
 <section id="dont-we-just-need-ocr" class="level2" data-number="2.2">
 <h2 data-number="2.2" class="anchored" data-anchor-id="dont-we-just-need-ocr"><span class="header-section-number">2.2</span> Don’t we just need OCR?</h2>
+<p>Traditional OCR extracts text from images, but that’s only half the problem. Consider an index card with a name, date, reference number, and description arranged in specific positions. OCR gives you a block of text—but not which part is the name, which is the date, or how they relate.</p>
+<p>Often, you don’t even need the raw text—you need the <em>information</em> it contains. A catalogue record doesn’t need “Mr.&nbsp;John Smith, 1847” preserved exactly; it needs <code>name: "John Smith"</code> and <code>year: 1847</code> as usable data.</p>
+<p>With OCR alone, you still need someone to parse text into structured fields. For hundreds of documents, that’s manageable. For hundreds of thousands, it’s not.</p>
 </section>
 <section id="solution-overview" class="level2" data-number="2.3">
 <h2 data-number="2.3" class="anchored" data-anchor-id="solution-overview"><span class="header-section-number">2.3</span> Solution Overview</h2>
+<p>Structured extraction is a pattern that works across modalities—text, images, audio transcripts. The core idea is the same: constrain a model to return data in a predefined schema rather than freeform text.</p>
+<p>For document images, we use Vision Language Models (VLMs). Unlike OCR, VLMs understand both visual layout and textual content together. They can see that “1847” appears in the date field position, not just that the characters “1847” exist somewhere on the page.</p>
+<p>Structured output generation constrains the model to return your fields, your format. The result: input in, structured JSON out.</p>
+<p>This section focuses on the image case—extracting from document images—but the same principles apply when working with text or other formats.</p>
 <section id="what-this-pattern-looks-like" class="level3" data-number="2.3.1">
+<h3 data-number="2.3.1" class="anchored" data-anchor-id="what-this-pattern-looks-like"><span class="header-section-number">2.3.1</span> What this pattern looks like</h3>
+<div class="cell" data-layout-align="default">
+<div class="cell-output-display">
+<div>
+<p></p><figure class="figure"><p></p>
+<div>
+<pre class="mermaid mermaid-js">flowchart LR
+    A[Document Image] --&gt; B[VLM + Schema]
+    B --&gt; C[Structured JSON]
+    C --&gt; D[Catalogue/Database]
+</pre>
+</div>
+<p></p></figure><p></p>
+</div>
+</div>
+</div>
+<p>The following chapters walk through this in detail—starting with basic VLM queries, then building to real extraction workflows with evaluation strategies.</p>
+</section>
+</section>
+<section id="when-to-use-this-pattern" class="level2" data-number="2.4">
+<h2 data-number="2.4" class="anchored" data-anchor-id="when-to-use-this-pattern"><span class="header-section-number">2.4</span> When to Use This Pattern</h2>
+<p><strong>Good fit:</strong></p>
+<ul>
+<li>Forms, index cards, registers with consistent layouts</li>
+<li>Documents where you know what fields you want to extract</li>
+<li>Collections too large for manual transcription</li>
+</ul>
+<p><strong>Less suited:</strong></p>
+<ul>
+<li>Free-form manuscripts with no predictable structure</li>
+<li>Documents requiring deep contextual interpretation</li>
+<li>Cases where verbatim transcription is the goal (use OCR instead)</li>
+</ul>
 </section>
 </main> <!-- /main -->

patterns/structured-generation/vlm-structured-generation.html CHANGED Viewed

The diff for this file is too large to render. See raw diff

patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-11-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-15-output-1.png CHANGED Viewed

Git LFS Details

SHA256: c72fa7e4865d47f87f9326269e1cd3dccbef9679c74fd3b6ca321819f829a283
Pointer size: 131 Bytes
Size of remote file: 546 kB

Git LFS Details

SHA256: 5327f58e8912e8d95519934165a054ea5f3a465048197529aa816ed95893d3f5
Pointer size: 131 Bytes
Size of remote file: 400 kB

patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-16-output-1.png CHANGED Viewed

Git LFS Details

SHA256: 9c0db64207ae3dce59fa480cd9db3896c76cfec393fffcd03c99da0543946531
Pointer size: 131 Bytes
Size of remote file: 475 kB

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-21-output-1.png CHANGED Viewed

Git LFS Details

SHA256: 5327f58e8912e8d95519934165a054ea5f3a465048197529aa816ed95893d3f5
Pointer size: 131 Bytes
Size of remote file: 400 kB

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-html/cell-9-output-1.png ADDED Viewed

Git LFS Details

SHA256: c72fa7e4865d47f87f9326269e1cd3dccbef9679c74fd3b6ca321819f829a283
Pointer size: 131 Bytes
Size of remote file: 546 kB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-11-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-12-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-14-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9c0db64207ae3dce59fa480cd9db3896c76cfec393fffcd03c99da0543946531
Pointer size: 131 Bytes
Size of remote file: 475 kB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-15-output-1.png ADDED Viewed

Git LFS Details

SHA256: 5327f58e8912e8d95519934165a054ea5f3a465048197529aa816ed95893d3f5
Pointer size: 131 Bytes
Size of remote file: 400 kB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-16-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-17-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-18-output-1.png ADDED Viewed

Git LFS Details

SHA256: 2bc344a05af6f7362fdac90e6a00366cca6912d6e1c38e658fa86a26cd395ad0
Pointer size: 132 Bytes
Size of remote file: 3.11 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-19-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-21-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-22-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-24-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-27-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-3-output-1.png ADDED Viewed

Git LFS Details

SHA256: 9f56c71ffd546d3ede3e0f65c7486598b98a35ee8eac5d002c76ccf0c719c927
Pointer size: 132 Bytes
Size of remote file: 4.22 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-4-output-1.png ADDED Viewed

Git LFS Details

SHA256: 8111b71a269399eccfea83dc6776665acf59e542e95432d5fb456a2adc856150
Pointer size: 132 Bytes
Size of remote file: 1.75 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-5-output-1.png ADDED Viewed

Git LFS Details

SHA256: 7bdbb92b670774648b5ba6079362a9c082c082c28cc9996d53d2356ef9eb51ba
Pointer size: 132 Bytes
Size of remote file: 3.49 MB

patterns/structured-generation/vlm-structured-generation_files/figure-pdf/cell-9-output-1.png ADDED Viewed

Git LFS Details

SHA256: c72fa7e4865d47f87f9326269e1cd3dccbef9679c74fd3b6ca321819f829a283
Pointer size: 131 Bytes
Size of remote file: 546 kB

patterns/what-is-an-ai-pattern.html CHANGED Viewed

@@ -2,7 +2,7 @@
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
-<meta name="generator" content="quarto-1.8.25">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
@@ -39,7 +39,7 @@ ul.task-list li input[type="checkbox"] {
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
-<link href="../site_libs/quarto-html/quarto-syntax-highlighting-7b89279ff1a6dce999919e0e67d4d9ec.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
@@ -96,9 +96,7 @@ ul.task-list li input[type="checkbox"] {
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
-    <div class="pt-lg-2 mt-2 text-left sidebar-header sidebar-header-stacked">
-      <a href="../index.html" class="sidebar-logo-link">
-      </a>
     <div class="sidebar-title mb-0 py-0">
       <a href="../">AI Design Patterns for GLAM</a>
     </div>
@@ -149,7 +147,7 @@ ul.task-list li input[type="checkbox"] {
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="../patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
@@ -157,12 +155,6 @@ ul.task-list li input[type="checkbox"] {
   <a href="../patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
-</li>
-          <li class="sidebar-item">
-  <div class="sidebar-item-container">
-  <a href="../patterns/structured-generation/advisor-index-cards.html" class="sidebar-item-text sidebar-link">
- <span class="menu-text"><span class="chapter-number">4</span>&nbsp; <span class="chapter-title">Practical Application: Advisor Index Card Extraction</span></span></a>
-  </div>
 </li>
       </ul>
   </li>
@@ -171,8 +163,16 @@ ul.task-list li input[type="checkbox"] {
 </nav>
 <div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
 <!-- margin-sidebar -->
-    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar zindex-bottom">
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
@@ -196,8 +196,33 @@ ul.task-list li input[type="checkbox"] {
 </header>
 </main> <!-- /main -->
 <script id="quarto-html-after-body" type="application/javascript">
@@ -607,8 +632,8 @@ ul.task-list li input[type="checkbox"] {
       </a>
   </div>
   <div class="nav-page nav-page-next">
-      <a href="../patterns/structured-generation/intro.html" class="pagination-link" aria-label="Example Pattern: Structured Document Processing">
-        <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Example Pattern: Structured Document Processing</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>

 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
 <meta charset="utf-8">
+<meta name="generator" content="quarto-1.8.27">
 <meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
 <script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
 <script src="../site_libs/quarto-html/anchor.min.js"></script>
 <link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css" rel="stylesheet" id="quarto-text-highlighting-styles">
 <script src="../site_libs/bootstrap/bootstrap.min.js"></script>
 <link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
 <link href="../site_libs/bootstrap/bootstrap-27c261d06b905028a18691de25d09dde.min.css" rel="stylesheet" append-hash="true" id="quarto-bootstrap" data-mode="light">
 <div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article">
 <!-- sidebar -->
   <nav id="quarto-sidebar" class="sidebar collapse collapse-horizontal quarto-sidebar-collapse-item sidebar-navigation floating overflow-auto">
+    <div class="pt-lg-2 mt-2 text-left sidebar-header">
     <div class="sidebar-title mb-0 py-0">
       <a href="../">AI Design Patterns for GLAM</a>
     </div>
           <li class="sidebar-item">
   <div class="sidebar-item-container">
   <a href="../patterns/structured-generation/intro.html" class="sidebar-item-text sidebar-link">
+ <span class="menu-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></span></a>
   </div>
 </li>
           <li class="sidebar-item">
   <a href="../patterns/structured-generation/vlm-structured-generation.html" class="sidebar-item-text sidebar-link">
  <span class="menu-text"><span class="chapter-number">3</span>&nbsp; <span class="chapter-title">Structured Information Extraction with Vision Language Models</span></span></a>
   </div>
 </li>
       </ul>
   </li>
 </nav>
 <div id="quarto-sidebar-glass" class="quarto-sidebar-collapse-item" data-bs-toggle="collapse" data-bs-target=".quarto-sidebar-collapse-item"></div>
 <!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">Table of contents</h2>
+  <ul>
+  <li><a href="#why-patterns" id="toc-why-patterns" class="nav-link active" data-scroll-target="#why-patterns"><span class="header-section-number">1.1</span> Why Patterns?</a></li>
+  <li><a href="#anatomy-of-a-pattern" id="toc-anatomy-of-a-pattern" class="nav-link" data-scroll-target="#anatomy-of-a-pattern"><span class="header-section-number">1.2</span> Anatomy of a Pattern</a></li>
+  <li><a href="#patterns-in-this-book" id="toc-patterns-in-this-book" class="nav-link" data-scroll-target="#patterns-in-this-book"><span class="header-section-number">1.3</span> Patterns in This Book</a></li>
+  </ul>
+</nav>
     </div>
 <!-- main -->
 <main class="content" id="quarto-document-content">
 </header>
+<p>A pattern is a reusable solution to a commonly occurring problem. The concept comes from architecture—Christopher Alexander’s work on design patterns—and was later adopted by software engineering. In this book, we apply the same idea to AI implementations in GLAM contexts.</p>
+<section id="why-patterns" class="level2" data-number="1.1">
+<h2 data-number="1.1" class="anchored" data-anchor-id="why-patterns"><span class="header-section-number">1.1</span> Why Patterns?</h2>
+<p>AI and machine learning are evolving rapidly. The models, APIs, and frameworks we use today will be superseded—often within months. But the underlying problems—extracting structured data from historical documents, assessing condition at scale, making collections discoverable—persist.</p>
+<p>Patterns help us in three ways:</p>
+<p><strong>They’re technology-agnostic.</strong> A pattern describes <em>what</em> problem you’re solving and <em>why</em> an approach works, not just <em>which</em> model to use. When better models emerge, the pattern still applies.</p>
+<p><strong>They’re communicable.</strong> Patterns give teams a shared vocabulary. Saying “we’re using a structured extraction pattern” conveys more than listing the specific models and APIs involved.</p>
+<p><strong>They’re adaptable.</strong> The same pattern can be implemented differently depending on your constraints—budget, infrastructure, staff expertise, risk tolerance.</p>
+</section>
+<section id="anatomy-of-a-pattern" class="level2" data-number="1.2">
+<h2 data-number="1.2" class="anchored" data-anchor-id="anatomy-of-a-pattern"><span class="header-section-number">1.2</span> Anatomy of a Pattern</h2>
+<p>Each pattern in this book follows a consistent structure:</p>
+<p><strong>The Challenge</strong> What recurring problem does this pattern address? What makes it difficult or impossible to solve with traditional approaches?</p>
+<p><strong>Solution Overview</strong> The high-level approach. What makes this work? What are the key components?</p>
+<p><strong>Implementation</strong> Technical walkthrough with working code. We use real examples from GLAM collections, not toy datasets.</p>
+<p><strong>Considerations</strong> When should you use this pattern? What are the tradeoffs? What might go wrong?</p>
+</section>
+<section id="patterns-in-this-book" class="level2" data-number="1.3">
+<h2 data-number="1.3" class="anchored" data-anchor-id="patterns-in-this-book"><span class="header-section-number">1.3</span> Patterns in This Book</h2>
+<p>This book currently covers:</p>
+<ul>
+<li><strong>Structured Information Extraction</strong> — Using Vision Language Models to extract structured metadata from document images (index cards, forms, registers)</li>
+</ul>
+<p>Additional patterns will be added as the book develops.</p>
+</section>
 </main> <!-- /main -->
 <script id="quarto-html-after-body" type="application/javascript">
       </a>
   </div>
   <div class="nav-page nav-page-next">
+      <a href="../patterns/structured-generation/intro.html" class="pagination-link" aria-label="Structured Document Processing">
+        <span class="nav-page-text"><span class="chapter-number">2</span>&nbsp; <span class="chapter-title">Structured Document Processing</span></span> <i class="bi bi-arrow-right-short"></i>
       </a>
   </div>
 </nav>

search.json CHANGED Viewed

@@ -29,37 +29,81 @@
       "Welcome"
     ]
   },
   {
     "objectID": "patterns/structured-generation/intro.html",
     "href": "patterns/structured-generation/intro.html",
-    "title": "2  Example Pattern: Structured Document Processing",
     "section": "",
-    "text": "2.1 The Challenge\nMany GLAM institutions have vast collections of structured documents - index cards, forms, registers - containing valuable information locked in physical or image formats. Manual transcription doesn’t scale, but the structured nature of these documents makes them ideal candidates for AI-powered processing.",
     "crumbs": [
       "Structured Information Extraction",
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Example Pattern: Structured Document Processing</span>"
     ]
   },
   {
     "objectID": "patterns/structured-generation/intro.html#dont-we-just-need-ocr",
     "href": "patterns/structured-generation/intro.html#dont-we-just-need-ocr",
-    "title": "2  Example Pattern: Structured Document Processing",
     "section": "2.2 Don’t we just need OCR?",
-    "text": "2.2 Don’t we just need OCR?",
     "crumbs": [
       "Structured Information Extraction",
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Example Pattern: Structured Document Processing</span>"
     ]
   },
   {
     "objectID": "patterns/structured-generation/intro.html#solution-overview",
     "href": "patterns/structured-generation/intro.html#solution-overview",
-    "title": "2  Example Pattern: Structured Document Processing",
     "section": "2.3 Solution Overview",
-    "text": "2.3 Solution Overview\nThis pattern uses Visual Language Models (VLMs) combined with structured output generation to automatically extract information from document images while preserving the original structure and relationships.\n\n2.3.1 What this pattern looks like?",
     "crumbs": [
       "Structured Information Extraction",
-      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Example Pattern: Structured Document Processing</span>"
     ]
   },
   {
@@ -67,7 +111,7 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "",
-    "text": "3.1 Introduction\nIn this chapter we’ll start to look at how we can use Visual Language Models (VLMs) to extract structured information from images of documents.\nWe already saw what this looked like at a conceptual level in the previous chapter. In this chapter we’ll get hands on with some code examples to illustrate how this can be done in practice. To start we’ll focus on some relativey simple documents and tasks. This allows us to focus on the core concepts without getting bogged down in too many complexities. It also means we can use open source models that can be run locally.",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
@@ -89,7 +133,7 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html#setup",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.3 Setup",
-    "text": "3.3 Setup\n\n3.3.1 Start LM Studio\n\nWe’ll use LM Studio for this notebook. Since we’ll be using the OpenAI Python client to interact with models run by LM Studio it will be fairly easy to switch to a different model/tool for running the models since many tools have an OpenAI compatible endpoint.\n\n\n\n\n\n\nIf you haven’t already, make sure to install LM Studio by following the instructions on the LM Studio website.\n\n\n\nWhile LM Studio is primarily known as a GUI tool for interacting with local LLMs, it also includes a built-in API server that is compatible with the OpenAI API. This allows us to use the same code we would use for OpenAI hosted models to interact with local models running in LM Studio.\nLM Studio has a command line interface (CLI) that we can use to start the server. We can check that the lms command is available here:\n\n!lms\n\n\n   __   __  ___  ______          ___        _______   ____\n\n  / /  /  |/  / / __/ /___ _____/ (_)__    / ___/ /  /  _/\n\n / /__/ /|_/ / _\\ \\/ __/ // / _  / / _ \\  / /__/ /___/ /  \n\n/____/_/  /_/ /___/\\__/\\_,_/\\_,_/_/\\___/  \\___/____/___/  \n\n\n\nlms - LM Studio CLI - v0.0.47\n\nGitHub: https://github.com/lmstudio-ai/lms\n\n\n\nUsage\n\nUsage: lms [options] [command]\n\n\n\nLM Studio CLI\n\n\n\nOptions:\n\n      -h, --help  display help for command\n\n\n\nManage Models:\n\n      get         Searching and downloading a model from online.\n\n      import      Import a model file into LM Studio\n\n      ls          List all downloaded models\n\n\n\nUse Models:\n\n      chat        Open an interactive chat with the currently loaded model.\n\n      load        Load a model\n\n      ps          List all loaded models\n\n      server      Commands for managing the local server\n\n      unload      Unload a model\n\n\n\nDevelop & Publish Artifacts:\n\n      clone       Clone an artifact from LM Studio Hub to a local folder.\n\n      create      Create a new project with scaffolding\n\n      dev         Starts the development server for the plugin in the current folder.\n\n      login       Authenticate with LM Studio\n\n      push        Uploads the plugin in the current folder to LM Studio Hub.\n\n\n\nSystem Management:\n\n      bootstrap   Bootstrap the CLI\n\n      flags       Set or get experiment flags\n\n      log         Log operations. Currently only supports streaming logs from LM Studio via `lms log\n\n                  stream`\n\n      runtime     Manage runtime engines\n\n      status      Prints the status of LM Studio\n\n      version     Prints the version of the CLI\n\n\n\nCommands:\n\n      help        display help for command\n\n\n\n\nWe can check that LM Studio server is running by using the lms server start command.\n\n!lms server start\n\nSuccess! Server is now running on port 1234\n\n\n\n\n3.3.2 Connect to LM Studio\nWe can use the OpenAI Python client (TODO add link), to connect to LM studio. By default LM studio is running on port 1234 on localhost so we can connect to it here. The default api_key is lm-studio.\n\nfrom openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http://localhost:1234/v1\",\n    api_key=\"lm-studio\"\n)\n\nWe can use various different methods with the client, for example we can access the models available:\n\nfrom rich import print as rprint\nmodels = client.models.list()\nrprint(f\"Connected. Models: {[m.id for m in models.data]}\")\n\nConnected. Models: ['qwen3-vl-2b-instruct-mlx', 'qwen/qwen2.5-vl-7b', 'qwen/qwen3-vl-8b', 'qwen/qwen3-vl-4b', \n'text-embedding-nomic-embed-text-v1.5', 'qwen3-vl-30b-a3b-instruct', 'qwen3-vl-30b-a3b-thinking@4bit', \n'qwen3-vl-30b-a3b-thinking@3bit', 'qwen/qwen3-4b-thinking-2507', 'google/gemma-3-12b', 'google/gemma-3-4b', \n'qwen2-0.5b-instruct-fingreylit', 'google/gemma-3n-e4b', 'granite-vision-3.3-2b', 'ibm/granite-4-h-tiny', \n'iconclass-vlm', 'mlx-community/qwen2.5-vl-3b-instruct', 'lmstudio-community/qwen2.5-vl-3b-instruct', \n'lfm2-vl-1.6b', 'mimo-vl-7b-rl-2508@q4_k_s', 'mimo-vl-7b-rl-2508@q8_0', 'qwen3-30b-a3b-instruct-2507', \n'qwen3-4b-instruct-2507-mlx', 'openai/gpt-oss-20b', 'mistralai/mistral-small-3.2', \n'qwen3-30b-a3b-instruct-2507-mlx', 'liquid/lfm2-1.2b', 'smollm3-3b-mlx', 'unsloth/smollm3-3b', \n'ggml-org/smollm3-3b', 'mlx-community/smollm3-3b']\n\n\n\n\n\n\n\n\n\n\nNoteAlternative: Using Hugging Face Inference Providers\n\n\n\n\n\nThis is WIP. TODO add an alternative path that doesn’t require any local model hosting using Inference Providers instead. sl\n\nfrom huggingface_hub import list_models\nfrom rich import print as rprint\nvlm_models = list(list_models(filter=\"image-text-to-text\", inference_provider='all', sort='trending_score', expand=['safetensors']))\nrprint(vlm_models[:5])\nvlm_models[0].safetensors.total\n\n[\n    ModelInfo(\n        id='Qwen/Qwen3-VL-8B-Instruct',\n        author=None,\n        sha=None,\n        created_at=None,\n        last_modified=None,\n        private=None,\n        disabled=None,\n        downloads=None,\n        downloads_all_time=None,\n        gated=None,\n        gguf=None,\n        inference=None,\n        inference_provider_mapping=None,\n        likes=None,\n        library_name=None,\n        tags=None,\n        pipeline_tag=None,\n        mask_token=None,\n        card_data=None,\n        widget_data=None,\n        model_index=None,\n        config=None,\n        transformers_info=None,\n        trending_score=116,\n        siblings=None,\n        spaces=None,\n        safetensors=SafeTensorsInfo(parameters={'BF16': 8767123696}, total=8767123696),\n        security_repo_status=None,\n        xet_enabled=None\n    ),\n    ModelInfo(\n        id='Qwen/Qwen3-VL-30B-A3B-Instruct',\n        author=None,\n        sha=None,\n        created_at=None,\n        last_modified=None,\n        private=None,\n        disabled=None,\n        downloads=None,\n        downloads_all_time=None,\n        gated=None,\n        gguf=None,\n        inference=None,\n        inference_provider_mapping=None,\n        likes=None,\n        library_name=None,\n        tags=None,\n        pipeline_tag=None,\n        mask_token=None,\n        card_data=None,\n        widget_data=None,\n        model_index=None,\n        config=None,\n        transformers_info=None,\n        trending_score=38,\n        siblings=None,\n        spaces=None,\n        safetensors=SafeTensorsInfo(parameters={'BF16': 31070754032}, total=31070754032),\n        security_repo_status=None,\n        xet_enabled=None\n    ),\n    ModelInfo(\n        id='Qwen/Qwen2.5-VL-7B-Instruct',\n        author=None,\n        sha=None,\n        created_at=None,\n        last_modified=None,\n        private=None,\n        disabled=None,\n        downloads=None,\n        downloads_all_time=None,\n        gated=None,\n        gguf=None,\n        inference=None,\n        inference_provider_mapping=None,\n        likes=None,\n        library_name=None,\n        tags=None,\n        pipeline_tag=None,\n        mask_token=None,\n        card_data=None,\n        widget_data=None,\n        model_index=None,\n        config=None,\n        transformers_info=None,\n        trending_score=16,\n        siblings=None,\n        spaces=None,\n        safetensors=SafeTensorsInfo(parameters={'BF16': 8292166656}, total=8292166656),\n        security_repo_status=None,\n        xet_enabled=None\n    ),\n    ModelInfo(\n        id='google/gemma-3-27b-it',\n        author=None,\n        sha=None,\n        created_at=None,\n        last_modified=None,\n        private=None,\n        disabled=None,\n        downloads=None,\n        downloads_all_time=None,\n        gated=None,\n        gguf=None,\n        inference=None,\n        inference_provider_mapping=None,\n        likes=None,\n        library_name=None,\n        tags=None,\n        pipeline_tag=None,\n        mask_token=None,\n        card_data=None,\n        widget_data=None,\n        model_index=None,\n        config=None,\n        transformers_info=None,\n        trending_score=16,\n        siblings=None,\n        spaces=None,\n        safetensors=SafeTensorsInfo(parameters={'BF16': 27432406640}, total=27432406640),\n        security_repo_status=None,\n        xet_enabled=None\n    ),\n    ModelInfo(\n        id='Qwen/Qwen3-VL-30B-A3B-Thinking',\n        author=None,\n        sha=None,\n        created_at=None,\n        last_modified=None,\n        private=None,\n        disabled=None,\n        downloads=None,\n        downloads_all_time=None,\n        gated=None,\n        gguf=None,\n        inference=None,\n        inference_provider_mapping=None,\n        likes=None,\n        library_name=None,\n        tags=None,\n        pipeline_tag=None,\n        mask_token=None,\n        card_data=None,\n        widget_data=None,\n        model_index=None,\n        config=None,\n        transformers_info=None,\n        trending_score=14,\n        siblings=None,\n        spaces=None,\n        safetensors=SafeTensorsInfo(parameters={'BF16': 31070754032}, total=31070754032),\n        security_repo_status=None,\n        xet_enabled=None\n    )\n]\n\n\n\n8767123696\n\n\n\nmax_params = 8767123696 # ~8.7B parameters\nvlm_models = list_models(filter=\"image-text-to-text\", inference_provider='all', sort='trending_score', expand=['safetensors'])\nvlm_models_small = [m for m in vlm_models if m.safetensors and m.safetensors.total &lt;= max_params]\n[m.id for m in vlm_models_small]\n\n['Qwen/Qwen3-VL-8B-Instruct',\n 'Qwen/Qwen2.5-VL-7B-Instruct',\n 'xtuner/llava-llama-3-8b-v1_1',\n 'TheFinAI/StockLLM',\n 'dinalad0/my-LLM_RAG-model']\n\n\n\nimport os\nfrom openai import OpenAI\nfrom dotenv import load_dotenv\nload_dotenv()\n\nhf_client = OpenAI(\n    base_url=\"https://router.huggingface.co/v1\",\n    api_key=os.environ[\"HF_TOKEN\"],\n)\n\ncompletion = hf_client.chat.completions.create(\n    model=\"Qwen/Qwen3-VL-8B-Instruct\",\n    messages=[\n        {\n            \"role\": \"user\",\n            \"content\": [\n                {\n                    \"type\": \"text\",\n                    \"text\": \"Describe this image in one sentence.\"\n                },\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\n                        \"url\": \"https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg\"\n                    }\n                }\n            ]\n        }\n    ],\n)\n\nprint(completion.choices[0].message)\n\nChatCompletionMessage(content='The Statue of Liberty stands tall on her island in New York Harbor, with the iconic Manhattan skyline rising majestically behind her under a clear blue sky.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None)",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
@@ -100,7 +144,7 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html#basic-vlm-query",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.4 Basic VLM Query",
-    "text": "3.4 Basic VLM Query\nLet’s start by defining a simple function that we can use to query a VLM with an image and a prompt. This function will handle converting the image to base64 and sending the request to the model.\nFor this notebook we’ll default to using the qwen3-vl-2b model which is a small 2 billion parameter model that can be run locally in LM Studio. We may want to experiment with different models later on or try slightly bigger models but this one should be sufficient for our initial experiments.\n\n\nCode\nimport base64\nfrom PIL.Image import Image as PILImage\nfrom io import BytesIO\n\ndef query_image(image: str | PILImage, prompt: str, model: str='qwen3-vl-2b-instruct-mlx', max_image_size: int=1024):\n    \"\"\"Query VLM with an image.\"\"\"\n    if isinstance(image, PILImage):\n        # Convert PIL Image to bytes and encode to base64\n        buffered = BytesIO()\n        # ensure image is not too big\n        if image.size &gt; (max_image_size, max_image_size):\n            image.thumbnail((max_image_size, max_image_size))   \n        image.save(buffered, format=\"JPEG\")\n        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    else:\n        # Assume image is a file path\n        with open(image, \"rb\") as f:\n            image_base64 = base64.b64encode(f.read()).decode('utf-8')\n    #\n    # Query\n    response = client.chat.completions.create(\n        model=model,\n        messages=[{\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": prompt},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"}}\n            ]\n        }]\n    )\n    return response.choices[0].message.content",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
@@ -111,7 +155,7 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html#simple-vlm-query-example",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.5 102: Simple VLM Query Example",
-    "text": "3.5 102: Simple VLM Query Example\nTo get started let’s do a simple query to describe an image from the dataset.\n\nimage = ds[0][\"image\"]\n\n# Query the VLM to describe the image\ndescription = query_image(image, \"Describe this image.\", model='qwen3-vl-2b-instruct-mlx')\nrprint(description)\n\nThis is a library reference card from The British Library's Reference Division, specifically for the Reprographic \nSection. It is a form used to catalog and manage manuscripts.\n\nThe card has several fields filled in with handwritten information, likely for a specific manuscript. The main \ndetails are:\n\n- **Department:** Manuscripts\n- **Shelfmark:** SLDANE 3972 C. (Vol 1)\n- **Order SCH No:** 98876\n- **Author:** SIR HANS SLOANES LIBRARY (This appears to be a typographical error, likely meant to be \"SIR HANS \nSLOANES\")\n- **Title:** CATALOGUE OF SIR HANS SLOANES LIBRARY\n- **Place and date of publication:** (This field is blank)\n- **Centimetres:** 1, 2, 3, 4, 5\n- **Inches:** 1, 2\n\nThe card also includes a reduction number: \"RD RS8\" and \"Reduction 12\". The card is for a manuscript titled \n\"Catalogue of Sir Hans Sloane's Library\" with the shelfmark SLDANE 3972 C. (Vol 1) and Order SCH No 98876.\n\nThe card is from The British Library, Reprographic Section, and the address is Gt Russell St, London WC1B 3DG.\n\n\n\nWe can see we get a fairly useful description of the card. If we compare against the image we can see most of the details it mentions appear to be largely correct.\n\nimage\n\n\n\n\n\n\n\n\nThere are workflows where open ended description like this could be useful but this isn’t usually the kind of format we want if we want to take some action or do something based on the predictions of the model. In these cases it’s usually nice to have some more controlled output, for example, a label.",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
@@ -122,7 +166,7 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html#classification",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.6 Classification",
-    "text": "3.6 Classification\n\nWe’ll define a fairly simple prompt that asks the VLM to decide if a page is one of three categories. We describe each of these categopries and then ask the model to only return one of these as the output. We’ll do this for ten examples and we’ll also log how long it’s taking.\n\nimport time\nfrom tqdm.auto import tqdm\n\nsample_size = 10\n\nsample = ds.take(sample_size)\n\nprompt = \"\"\"Classify this image into one of the following categories:\n\n1. **Index/Reference Card**: A library catalog or reference card\n\n2. **Manuscript Page**: A handwritten or historical document page\n\n3. **Other**: Any document that doesn't fit the above categories\n\nExamine the overall structure, layout, and content type to determine the classification. Focus on whether the document is a structured catalog/reference tool (Index Card) or a historical manuscript with continuous text (Manuscript Page).\n\nReturn only the category name: \"Index/Reference Card\", \"Manuscript Page\", or \"Other\"\n\"\"\"\n\nresults = []\n# Time the execution using standard Python\nstart_time = time.time()\nfor row in tqdm(sample):\n    image = row['image']\n    results.append(query_image(image, prompt))\nelapsed_time = time.time() - start_time\nprint(f\"Execution time: {elapsed_time:.2f} seconds\")\nrprint(results)\n\n\n\n\nExecution time: 100.05 seconds\n\n\n[\n    'Index/Reference Card',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page'\n]\n\n\n\nLet’s check the result that was predicted as “index/reference card”\n\nsample[0]['image']\n\n\n\n\n\n\n\n\nWe can extrapolate how long this would take for the full dataset\n\n# Calculate average time per image\navg_time_per_image = elapsed_time / sample_size\n\n# Project time for full dataset\ntotal_images = len(ds)\nprojected_time = avg_time_per_image * total_images\n\nprint(f\"Sample processing time: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)\")\nprint(f\"Average time per image: {avg_time_per_image:.2f} seconds\")\nprint(f\"Total images in dataset: {total_images}\")\nprint(f\"Projected time for full dataset: {projected_time/60:.2f} minutes ({projected_time/3600:.2f} hours)\")\n\nSample processing time: 100.05 seconds (1.67 minutes)\nAverage time per image: 10.01 seconds\nTotal images in dataset: 2734\nProjected time for full dataset: 455.91 minutes (7.60 hours)\n\n\n\n3.6.1 Classifying with structured labels\nIn the previous example, we relied on the model to return the label in the correct format. While this often works, it can sometimes lead to inconsistencies in the output. To address this, we can use Pydantic models to define a structured output format. This way, we can ensure that the output adheres to a specific schema.\nIn this example, we’ll define a Pydantic model for our classification task. The model will have a single field category which can take one of three literal values \"Index/Reference Card\", \"Manuscript Page\", or \"other\".\nWhat this means in practice is that the model will only be able to return one of these three values for the category field.\n\nfrom pydantic import BaseModel, Field\nfrom typing import Literal\n\nclass PageCategory(BaseModel):\n    category: Literal[\"Index/Reference Card\", \"Manuscript Page\", \"other\"] = Field(\n        ..., description=\"The category of the image\"\n    )\n\nWhen using the OpenAI client we can specify this Pydantic model as the response_format when making the request. This tells the model to return the output in a format that can be parsed into the Pydantic model (the APIs for this are still evolving so may change slightly over time).\n\nbuffered = BytesIO()\nimage.save(buffered, format=\"JPEG\")\nimage_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\ncompletion = client.beta.chat.completions.parse(\n    model=\"qwen/qwen2.5-vl-7b\",\n    messages=[\n         {\n            \"role\": \"user\",\n            \"content\": [\n                {\n                    \"type\": \"text\",\n                    \"text\": prompt,\n                },\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"},\n                },\n            ],\n        },\n    ],\n    max_tokens=100,\n    temperature=0.7,\n    response_format=PageCategory,\n)\nrprint(completion)\nrprint(completion.choices[0].message.parsed)\n\nParsedChatCompletion[PageCategory](\n    id='chatcmpl-v8bizojixwds0z7pg8j0th',\n    choices=[\n        ParsedChoice[PageCategory](\n            finish_reason='stop',\n            index=0,\n            logprobs=None,\n            message=ParsedChatCompletionMessage[PageCategory](\n                content='{\"category\": \"Manuscript Page\"}',\n                refusal=None,\n                role='assistant',\n                annotations=None,\n                audio=None,\n                function_call=None,\n                tool_calls=None,\n                parsed=PageCategory(category='Manuscript Page')\n            )\n        )\n    ],\n    created=1761588374,\n    model='qwen/qwen2.5-vl-7b',\n    object='chat.completion',\n    service_tier=None,\n    system_fingerprint='qwen/qwen2.5-vl-7b',\n    usage=CompletionUsage(\n        completion_tokens=10,\n        prompt_tokens=142,\n        total_tokens=152,\n        completion_tokens_details=None,\n        prompt_tokens_details=None\n    ),\n    stats={}\n)\n\n\n\nPageCategory(category='Manuscript Page')\n\n\n\n\nimage",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
@@ -133,142 +177,21 @@
     "href": "patterns/structured-generation/vlm-structured-generation.html#beyond-classifying---extracting-structured-information",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.7 Beyond classifying - Extracting structured information",
-    "text": "3.7 Beyond classifying - Extracting structured information\nSo far we’ve focused on classifying images but what if we want to extract information from the images? Let’s take the first example from the dataset again.\n\nindex_image = ds[0]['image']\nindex_image\n\n\n\n\n\n\n\n\nIf we have an image like this we don’t just want to assign a label from it (we may do this as a first step) we actually want to extract the various fields from the card in a structured way. We can again use a Pydantic model to define the structure of the data we want to extract.\n\nfrom pydantic import BaseModel, Field\nfrom typing import Optional\n\n\nclass BritishLibraryReprographicCard(BaseModel):\n    \"\"\"\n    Pydantic model for extracting information from British Library Reference Division \n    reprographic cards used to document manuscripts and other materials.\n    \"\"\"\n    \n    department: str = Field(\n        ..., \n        description=\"The division that holds the material (e.g., 'MANUSCRIPTS')\"\n    )\n    \n    shelfmark: str = Field(\n        ..., \n        description=\"The library's classification/location code (e.g., 'SLOANE 3972.C. (VOL 1)')\"\n    )\n    \n    order: str = Field(\n        ..., \n        description=\"Order reference, typically starting with 'SCH NO' followed by numbers\"\n    )\n    \n    author: Optional[str] = Field(\n        None, \n        description=\"Author name if present, null if blank or marked with diagonal line\"\n    )\n    \n    title: str = Field(\n        ..., \n        description=\"The name of the work or manuscript\"\n    )\n    \n    place_and_date_of_publication: Optional[str] = Field(\n        None, \n        description=\"Place and date of publication if present, null if blank\"\n    )\n    \n    reduction: int = Field(\n        ..., \n        description=\"The reduction number shown at the bottom of the card\"\n    )\n\nWe’ll now create a function to handle the querying process using this structured schema.\n\ndef query_image_structured(image, prompt, schema, model='qwen3-vl-2b-instruct-mlx'):\n    \"\"\"\n    Query VLM with an image and get structured output based on a Pydantic schema.\n    \n    Args:\n        image: PIL Image or file path to the image\n        prompt: Text prompt describing what to extract\n        schema: Pydantic model class defining the expected output structure\n        model: Model ID to use for the query\n    \n    Returns:\n        Parsed Pydantic model instance with the extracted data\n    \"\"\"\n    # Convert image to base64\n    if isinstance(image, PILImage):\n        buffered = BytesIO()\n        image.save(buffered, format=\"JPEG\")\n        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    else:\n        with open(image, \"rb\") as f:\n            image_base64 = base64.b64encode(f.read()).decode('utf-8')\n    \n    # Query with structured output\n    completion = client.beta.chat.completions.parse(\n        model=model,\n        messages=[{\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": prompt},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"}}\n            ]\n        }],\n        response_format=schema,\n        temperature=0.3  # Lower temperature for more consistent extraction\n    )\n    \n    # Return the parsed structured data\n    return completion.choices[0].message.parsed\n\nWe also need to define a prompt that describes what information we want to extract from the card.\n\n# Example usage\nextraction_prompt = \"\"\"\nExtract the information from this British Library card into structured data (JSON format).\n\nRead each field on the card and extract the following information:\n- department: The division name (e.g., \"MANUSCRIPTS\")\n- shelfmark: The catalog number (e.g., \"SLOANE 3972.C. (VOL 1)\")\n- order: The SCH NO reference number\n- author: The author name, or null if blank\n- title: The full title of the work\n- place_and_date_of_publication: Publication info, or null if blank\n- reduction: The reduction number (as integer) at bottom of card\n\nReturn the exact text as shown on the card. For empty fields with diagonal lines or no text, use null.\n\"\"\"\nresult = query_image_structured(index_image, extraction_prompt, BritishLibraryReprographicCard)\nrprint(result)\n\nBritishLibraryReprographicCard(\n    department='MANUSCRIPTS',\n    shelfmark='SLOANE 3972.C. (VOL 1)',\n    order='98876',\n    author='HANS SLOANES',\n    title='CATALOGUE OF SIR HANS SLOANES LIBRARY',\n    place_and_date_of_publication=None,\n    reduction=12\n)\n\n\n\n\nrprint(result)\n\nBritishLibraryReprographicCard(\n    department='MANUSCRIPTS',\n    shelfmark='SLOANE 3972.C. (VOL 1)',\n    order='98876',\n    author='HANS SLOANES',\n    title='CATALOGUE OF SIR HANS SLOANES LIBRARY',\n    place_and_date_of_publication=None,\n    reduction=12\n)\n\n\n\n\nindex_image",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     ]
   },
   {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html",
-    "href": "patterns/structured-generation/advisor-index-cards.html",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "",
-    "text": "4.1 Introduction\nThis chapter demonstrates a practical application of VLM-based structured extraction on a real-world GLAM digitization project: extracting structured metadata from historical index cards from the National Library of Scotland’s Advocate’s Library collection.\nUnlike the previous chapter which focused on explaining VLM concepts and setup, this chapter assumes you’re familiar with the basics and focuses on:",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#introduction",
-    "href": "patterns/structured-generation/advisor-index-cards.html#introduction",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "",
-    "text": "Designing schemas for real catalog requirements\nRunning extractions at scale\nEvaluating extraction quality - different strategies for assessing accuracy\nHandling edge cases and failures",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#the-task-advisor-index-cards",
-    "href": "patterns/structured-generation/advisor-index-cards.html#the-task-advisor-index-cards",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.2 The Task: Advisor Index Cards",
-    "text": "4.2 The Task: Advisor Index Cards\nThe National Library of Scotland has a collection of historical index cards documenting manuscripts and correspondence. Each card follows a fairly consistent format:\n\nSurname: Family name\nForenames: Given names\nEpithet: Role, title, or occupation\nMS no: Manuscript reference number\nDescription: Document type and date\nFolios: Page references\n\nThe goal is to extract this structured information to enable: - Searchable digital catalog - Integration with library management systems - Research access to historical collections\n\n4.2.1 Example Cards\nLet’s look at a few sample cards from the collection:\n\nfrom pathlib import Path\nimport matplotlib.pyplot as plt\n\nimages = list(Path(\"../../assets/vllm-structured-generation/indexes/\").rglob(\"*.JPG\"))\nimages\n\n[PosixPath('../../assets/vllm-structured-generation/indexes/DSC00172.JPG'),\n PosixPath('../../assets/vllm-structured-generation/indexes/DSC00173.JPG'),\n PosixPath('../../assets/vllm-structured-generation/indexes/DSC00171.JPG'),\n PosixPath('../../assets/vllm-structured-generation/indexes/DSC00170.JPG'),\n PosixPath('../../assets/vllm-structured-generation/indexes/DSC00169.JPG'),\n PosixPath('../../assets/vllm-structured-generation/indexes/DSC00168.JPG')]\n\n\n\n# display a grid of images using matplotlib (len of images)\nnumber_of_images = len(images)\ncols = 3\nrows = (number_of_images + cols - 1) // cols\nfig, axs = plt.subplots(rows, cols, figsize=(15, 5 * rows))\nfor i, img_path in enumerate(images):\n    img = plt.imread(img_path)\n    ax = axs[i // cols, i % cols] if rows &gt; 1 else axs[i % cols]\n    ax.imshow(img)\n    ax.axis('off')\n    ax.set_title(img_path.stem)\nplt.tight_layout()\nplt.show()",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#schema-design",
-    "href": "patterns/structured-generation/advisor-index-cards.html#schema-design",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.3 Schema Design",
-    "text": "4.3 Schema Design\nWorking with the library curators, we designed a schema that matches their cataloging requirements. The schema is intentionally simple - complex schemas are harder for VLMs to extract reliably.\nThis schema is something we can iterate on later based on extraction quality but gives us a solid starting point.\n\nfrom pydantic import BaseModel, Field\nfrom typing import Optional\n\nclass IndexCardEntry(BaseModel):\n    \"\"\"Schema for index card extraction matching curator specification\"\"\"\n    \n    surname: str = Field(..., description=\"Family name as written on card\")\n    forenames: Optional[str] = Field(None, description=\"Given names\")\n    epithet: Optional[str] = Field(None, description=\"Title, occupation, or role\")\n    ms_no: str = Field(..., description=\"Manuscript number\")\n    description: str = Field(..., description=\"Document description with date\")\n    folios: str = Field(..., description=\"Folio reference\")\n    \n    failed_to_parse: bool = Field(\n        False,\n        description=\"Set to True if the card cannot be reliably extracted (illegible, damaged, etc.)\"\n    )\n    notes: Optional[str] = Field(\n        None, \n        description=\"Optional notes about the card: handwritten annotations, ambiguities, \"\n                    \"corrections, or reasons for failed parsing.\"\n    )\n\n\nLet’s take a look at the schema definition we’ll use for extraction:\n\n# Display the schema\nfrom rich import print\nprint(IndexCardEntry.model_json_schema())\n\n{\n    'description': 'Schema for index card extraction matching curator specification',\n    'properties': {\n        'surname': {'description': 'Family name as written on card', 'title': 'Surname', 'type': 'string'},\n        'forenames': {\n            'anyOf': [{'type': 'string'}, {'type': 'null'}],\n            'default': None,\n            'description': 'Given names',\n            'title': 'Forenames'\n        },\n        'epithet': {\n            'anyOf': [{'type': 'string'}, {'type': 'null'}],\n            'default': None,\n            'description': 'Title, occupation, or role',\n            'title': 'Epithet'\n        },\n        'ms_no': {'description': 'Manuscript number', 'title': 'Ms No', 'type': 'string'},\n        'description': {\n            'description': 'Document description with date',\n            'title': 'Description',\n            'type': 'string'\n        },\n        'folios': {'description': 'Folio reference', 'title': 'Folios', 'type': 'string'},\n        'failed_to_parse': {\n            'default': False,\n            'description': 'Set to True if the card cannot be reliably extracted (illegible, damaged, etc.)',\n            'title': 'Failed To Parse',\n            'type': 'boolean'\n        },\n        'notes': {\n            'anyOf': [{'type': 'string'}, {'type': 'null'}],\n            'default': None,\n            'description': 'Optional notes about the card: handwritten annotations, ambiguities, corrections, or \nreasons for failed parsing.',\n            'title': 'Notes'\n        }\n    },\n    'required': ['surname', 'ms_no', 'description', 'folios'],\n    'title': 'IndexCardEntry',\n    'type': 'object'\n}",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#setup",
-    "href": "patterns/structured-generation/advisor-index-cards.html#setup",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.4 Setup",
-    "text": "4.4 Setup\nWe’ll reuse the VLM setup from the previous chapter. If you haven’t already, make sure LM Studio is running with a VLM loaded.\n\nfrom openai import OpenAI\nimport base64\nfrom io import BytesIO\nfrom PIL import Image as PILImage\n\n\nclient = OpenAI(\n    base_url=\"http://localhost:1234/v1\",\n    api_key=\"lm-studio\"\n)\n\n\nclient.models.list()  \n\nSyncPage[Model](data=[Model(id='qwen3-vl-2b-instruct-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-vl-8b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-vl-4b', created=None, object='model', owned_by='organization_owner'), Model(id='text-embedding-nomic-embed-text-v1.5', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-thinking@4bit', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-vl-30b-a3b-thinking@3bit', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen3-4b-thinking-2507', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3-12b', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3-4b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen2-0.5b-instruct-fingreylit', created=None, object='model', owned_by='organization_owner'), Model(id='google/gemma-3n-e4b', created=None, object='model', owned_by='organization_owner'), Model(id='granite-vision-3.3-2b', created=None, object='model', owned_by='organization_owner'), Model(id='ibm/granite-4-h-tiny', created=None, object='model', owned_by='organization_owner'), Model(id='iconclass-vlm', created=None, object='model', owned_by='organization_owner'), Model(id='mlx-community/qwen2.5-vl-3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='lmstudio-community/qwen2.5-vl-3b-instruct', created=None, object='model', owned_by='organization_owner'), Model(id='lfm2-vl-1.6b', created=None, object='model', owned_by='organization_owner'), Model(id='mimo-vl-7b-rl-2508@q4_k_s', created=None, object='model', owned_by='organization_owner'), Model(id='mimo-vl-7b-rl-2508@q8_0', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-30b-a3b-instruct-2507', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-4b-instruct-2507-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='openai/gpt-oss-20b', created=None, object='model', owned_by='organization_owner'), Model(id='qwen/qwen2.5-vl-7b', created=None, object='model', owned_by='organization_owner'), Model(id='mistralai/mistral-small-3.2', created=None, object='model', owned_by='organization_owner'), Model(id='qwen3-30b-a3b-instruct-2507-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='liquid/lfm2-1.2b', created=None, object='model', owned_by='organization_owner'), Model(id='smollm3-3b-mlx', created=None, object='model', owned_by='organization_owner'), Model(id='unsloth/smollm3-3b', created=None, object='model', owned_by='organization_owner'), Model(id='ggml-org/smollm3-3b', created=None, object='model', owned_by='organization_owner'), Model(id='mlx-community/smollm3-3b', created=None, object='model', owned_by='organization_owner')], object='list')\n\n\n\nfrom typing import Union\ndef query_image_structured(image: Union[PILImage.Image, str], prompt: str, schema: BaseModel, model='qwen/qwen3-vl-4b'):\n    \"\"\"\n    Query VLM with an image and get structured output based on a Pydantic schema.\n    \n    Args:\n        image: PIL Image or file path to the image\n        prompt: Text prompt describing what to extract\n    schema: Pydantic model class defining the expected output structure\n        model: Model ID to use for the query\n    \n    Returns:\n        Parsed Pydantic model instance with the extracted data\n    \"\"\"\n    # Convert image to base64\n    if isinstance(image, PILImage.Image):\n        buffered = BytesIO()\n        image.save(buffered, format=\"JPEG\")\n        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    else:\n        with open(image, \"rb\") as f:\n            image_base64 = base64.b64encode(f.read()).decode('utf-8')\n    \n    # Query with structured output\n    completion = client.beta.chat.completions.parse(\n        model=model,\n        messages=[{\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": prompt},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"}}\n            ]\n        }],\n        response_format=schema,\n        temperature=0.3  # Lower temperature for more consistent extraction\n    )\n    \n    # Return the parsed structured data\n    return completion.choices[0].message.parsed",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#extraction-examples",
-    "href": "patterns/structured-generation/advisor-index-cards.html#extraction-examples",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.5 Extraction Examples",
-    "text": "4.5 Extraction Examples\nLet’s run extraction on several sample cards to see how the model performs.\n\nprompt = \"\"\"Extract structured information from this historical library index card and return it as JSON.\n\n  This is an index card from the National Library of Scotland's Advocate's Library collection. Each card documents a person and associated manuscript references.\n\n  Return a JSON object with these exact fields:\n\n  {\n    \"surname\": \"Family name exactly as typed (e.g., 'ABAD', 'ABARACA Y BOLEA')\",\n    \"forenames\": \"Given names (e.g., 'Joseph', 'Thomas') or null if not present\",\n    \"epithet\": \"Title, occupation, or role (e.g., 'Captain, Spanish Army') or null if not present\",\n    \"ms_no\": \"Manuscript number exactly as written (e.g., '5538', '5529')\",\n    \"description\": \"Document description with date (e.g., 'letter of (1783)', 'copy of petition of (ca. 1783)')\",\n    \"folios\": \"Folio reference exactly as written (e.g., 'f.11', 'f.169')\",\n    \"failed_to_parse\": false (or true if card is illegible/severely damaged),\n    \"notes\": \"Optional notes about handwritten corrections, ambiguities, or parsing issues\"\n  }\n\n  Guidelines:\n  - Extract text exactly as it appears - do not correct spelling or expand abbreviations\n  - Preserve original punctuation and formatting\n  - If a field is unclear but you can make a reasonable inference, extract it and note the ambiguity in \"notes\"\n  - Only set \"failed_to_parse\" to true if you genuinely cannot extract the required fields\n  - Use null for optional fields (forenames, epithet, notes) if they are not present or marked with a line\"\"\"\n\n\nimage = PILImage.open(images[0])\nimage \n\n\n\n\n\n\n\n\n\nfrom rich import print\nresult = query_image_structured(image, prompt, IndexCardEntry, model='qwen/qwen3-vl-4b')\n    \n\n\nprint(result)\n\nIndexCardEntry(\n    surname='ABBAATE',\n    forenames='Itala',\n    epithet='Daughter of the Physician',\n    ms_no='2633',\n    description='letter of (1878)',\n    folios='f. 38',\n    failed_to_parse=False,\n    notes=\"Handwritten corrections and annotations present: 'Cairo' (instead of 'ABBAATE'), 'Cairo' (instead of \n'ABBAATE'), 'Physician' (instead of 'Physician'), '2633' (instead of '2633'), 'f. 38' (instead of 'f. 38'). Also, \n'Cairo' appears to be a scribbled correction or miswriting of 'ABBAATE'.\"\n)\n\n\n\n\n4.5.1 Comparing Extraction to Ground Truth\nLet’s compare a few extractions to the actual card content:\n\nfrom tqdm.auto import tqdm\n\nresults = []\nfor img_path in tqdm(images):\n    image = PILImage.open(img_path)\n    result = query_image_structured(image, prompt, IndexCardEntry, model='qwen/qwen3-vl-4b')\n    results.append((img_path.stem, result))",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#evaluation-strategies",
-    "href": "patterns/structured-generation/advisor-index-cards.html#evaluation-strategies",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.6 Evaluation Strategies",
-    "text": "4.6 Evaluation Strategies\nHow do we know if the extraction is working well? There are several approaches to evaluation, each with different tradeoffs.\n\n4.6.1 Looking at lots of samples\nIt sounds simple, but looking at a large number of random samples can give a good sense of overall quality. You can spot common errors and get a feel for how reliable the extraction is. You can quickly build intuition about what might be going wrong and where to focus improvement efforts. Realistically you will usually spend some time iterating on the prompt and schema at this stage. Looking at more than one example is important to avoid overfitting to a single case but you don’t immediately need to look at hundreds of examples or set up complex metrics or evaluations. This can come later.\n\nfor i, (img_stem, result) in enumerate(results):\n    fig, (ax_img, ax_text) = plt.subplots(1, 2, figsize=(16, 6), \n                                           gridspec_kw={'width_ratios': [1, 1]})\n\n    # Left: Display image\n    img = plt.imread(images[i])\n    ax_img.imshow(img)\n    ax_img.axis('off')\n    ax_img.set_title(f\"Card {i+1}: {img_stem}\", fontsize=14, fontweight='bold')\n\n    # Right: Display extracted data as formatted text\n    ax_text.axis('off')\n\n    # Format the extracted data nicely\n    text_lines = [\n        \"Extracted Data:\",\n        \"\",\n        f\"Surname: {result.surname}\",\n        f\"Forenames: {result.forenames or 'N/A'}\",\n        f\"Epithet: {result.epithet or 'N/A'}\",\n        f\"MS No: {result.ms_no}\",\n        f\"Description: {result.description}\",\n        f\"Folios: {result.folios}\",\n        \"\",\n        f\"Failed to Parse: {result.failed_to_parse}\",\n    ]\n\n    # Add notes if present\n    if result.notes:\n        text_lines.extend((\"\", \"Notes:\"))\n        # Wrap long notes\n        import textwrap\n        wrapped_notes = textwrap.fill(result.notes, width=60)\n        text_lines.append(wrapped_notes)\n\n    # Join and display\n    formatted_text = \"\\n\".join(text_lines)\n    ax_text.text(0.05, 0.95, formatted_text, \n                 transform=ax_text.transAxes,\n                 fontsize=11,\n                 verticalalignment='top',\n                 fontfamily='monospace',\n                 bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))\n\n    plt.tight_layout()\n    plt.show()\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n4.6.1.1 What we learned from these samples\n\nIt seems that in these examples the notes field isn’t really adding much value and potentially it just adds noise.\nWhile the failed_to_parse flag sounds useful, we may want to rely on other approaches to identify failures since the model may not always set this flag correctly (and in this case we probably have some other ways to identify failures like looking for missing critical fields).\nOverall, we should prioritize extracting the most relevant information and avoid including fields that do not contribute to the understanding of the index card content. The simpler the schema the less for us to have to check and the fewer tokens the model has to generate. When we’re testing with small batches it doesn’t seem so important but when scaling to thousands of cards it can make a bigger difference.\n\n\nfrom pydantic import BaseModel, Field\nfrom typing import Optional\n\nclass IndexCardEntry(BaseModel):\n    \"\"\"Schema for index card extraction matching curator specification\"\"\"\n    \n    surname: str = Field(..., description=\"Family name as written on card\")\n    forenames: Optional[str] = Field(None, description=\"Given names\")\n    epithet: Optional[str] = Field(None, description=\"Title, occupation, or role\")\n    ms_no: str = Field(..., description=\"Manuscript number\")\n    description: str = Field(..., description=\"Document description with date\")\n    folios: str = Field(..., description=\"Folio reference\")\n    \n\nprompt = \"\"\"Extract structured information from this historical library index card and return it as JSON.\n\n  This is an index card from the National Library of Scotland's Advocate's Library collection. Each card documents a person and associated manuscript references.\n\n  Return a JSON object with these exact fields:\n\n  {\n    \"surname\": \"Family name exactly as typed (e.g., 'ABAD', 'ABARACA Y BOLEA')\",\n    \"forenames\": \"Given names (e.g., 'Joseph', 'Thomas') or null if not present\",\n    \"epithet\": \"Title, occupation, or role (e.g., 'Captain, Spanish Army') or null if not present\",\n    \"ms_no\": \"Manuscript number exactly as written (e.g., '5538', '5529')\",\n    \"description\": \"Document description with date (e.g., 'letter of (1783)', 'copy of petition of (ca. 1783)')\",\n    \"folios\": \"Folio reference exactly as written (e.g., 'f.11', 'f.169')\",\n  }\n\n  Guidelines:\n  - Extract text exactly as it appears - do not correct spelling or expand abbreviations\n  - Preserve original punctuation and formatting\n  - Use null for optional fields (forenames, epithet, notes) if they are not present or marked with a line\"\"\"\n\n\nresults = []\nfor img_path in tqdm(images):\n    image = PILImage.open(img_path)\n    result = query_image_structured(image, prompt, IndexCardEntry, model='qwen/qwen3-vl-8b')\n    results.append((img_path.stem, result))\n\n\n\n\n\n# Display images with extracted data side-by-side\n# Two columns: left = image, right = extracted text\n\nfor i, (img_stem, result) in enumerate(results):\n    fig, (ax_img, ax_text) = plt.subplots(1, 2, figsize=(16, 6), \n                                           gridspec_kw={'width_ratios': [1, 1]})\n    \n    # Left: Display image\n    img = plt.imread(images[i])\n    ax_img.imshow(img)\n    ax_img.axis('off')\n    ax_img.set_title(f\"Card {i+1}: {img_stem}\", fontsize=14, fontweight='bold')\n    \n    # Right: Display extracted data as formatted text\n    ax_text.axis('off')\n    \n    # Format the extracted data nicely\n    text_lines = [\n        \"Extracted Data:\",\n        \"\",\n        f\"Surname: {result.surname}\",\n        f\"Forenames: {result.forenames or 'N/A'}\",\n        f\"Epithet: {result.epithet or 'N/A'}\",\n        f\"MS No: {result.ms_no}\",\n        f\"Description: {result.description}\",\n        f\"Folios: {result.folios}\",\n        \"\",\n    ]\n    # Join and display\n    formatted_text = \"\\n\".join(text_lines)\n    ax_text.text(0.05, 0.95, formatted_text, \n                 transform=ax_text.transAxes,\n                 fontsize=11,\n                 verticalalignment='top',\n                 fontfamily='monospace',\n                 bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.3))\n    \n    plt.tight_layout()\n    plt.show()\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n4.6.2 1. Manual Ground Truth Evaluation\nThe Gold Standard: Manually annotate a sample of cards and compare.\nPros: - Most accurate measure of performance - Catches all types of errors - Builds training data for future improvements\nCons: - Time consuming - Requires expert annotators - Limited sample size\nBest for: Final validation, establishing baselines, understanding failure modes\n\n# TODO: Load manually annotated ground truth\n# Compare predictions to ground truth\n# Calculate field-level accuracy\n\n# Example metrics:\n# - Exact match accuracy per field\n# - Character error rate\n# - Common error patterns\n\n\n\n4.6.3 2. Cross-Model Evaluation (Model-as-Judge)\nThe Pragmatic Approach: Use a stronger/different model to evaluate outputs.\nPros: - Much faster than manual annotation - Can evaluate full dataset - Good for catching obvious errors\nCons: - Requires access to multiple models - May miss subtle errors - Judge model can be wrong too\nBest for: Large-scale quality monitoring, automated testing, identifying problem areas for manual review\n\n# TODO: Implement model-as-judge evaluation\n# - Extract with Model A (e.g., local Qwen)\n# - Show image + extraction to Model B (e.g., Claude/GPT-4)\n# - Ask Model B to rate accuracy and identify errors\n# - Aggregate results\n\n# Example judge prompt:\n# \"\"\"\n# Compare this extracted data to the index card image:\n# [extraction]\n# \n# For each field, rate accuracy:\n# - Correct: Field matches card exactly\n# - Minor error: Small typo or formatting difference\n# - Major error: Wrong information\n# - Missing: Field is on card but not extracted\n# \"\"\"\n\n\n\n4.6.4 3. Internal Consistency Checks\nThe Automated Approach: Use business rules and patterns to identify suspicious outputs.\nExamples: - Manuscript numbers should follow known patterns - Dates should be within expected ranges - Folio references have consistent formats - Certain fields should always be present\nPros: - Completely automated - Fast - can run on full dataset - No additional model costs\nCons: - Only catches specific error types - Requires domain knowledge to design rules - Can miss errors that follow valid patterns\nBest for: Flagging outliers for review, automated quality gates, monitoring production systems\n\n# TODO: Implement consistency checks\n\n# def validate_extraction(entry: IndexCardEntry) -&gt; list[str]:\n#     \"\"\"Run validation checks and return list of warnings.\"\"\"\n#     warnings = []\n#     \n#     # Check MS number format\n#     if not re.match(r'^\\d+', entry.ms_no):\n#         warnings.append(f\"Unusual MS number format: {entry.ms_no}\")\n#     \n#     # Check for dates in expected range\n#     dates = re.findall(r'\\d{4}', entry.description)\n#     for date in dates:\n#         if not (1500 &lt;= int(date) &lt;= 1950):\n#             warnings.append(f\"Date outside expected range: {date}\")\n#     \n#     # Check folio format\n#     if not re.match(r'^f+\\.?\\s*\\d+', entry.folios, re.IGNORECASE):\n#         warnings.append(f\"Unusual folio format: {entry.folios}\")\n#     \n#     return warnings\n\n\n\n4.6.5 4. Confidence Scoring\nMany VLM APIs return confidence scores or logprobs. We can use these to identify uncertain extractions.\nPros: - No additional cost or models needed - Can prioritize review efforts - Helps establish quality thresholds\nCons: - Not all models/APIs provide confidence scores - High confidence doesn’t guarantee correctness - Requires calibration\nBest for: Prioritizing manual review, quality-based routing, understanding model uncertainty\n\n# TODO: If available, extract and analyze confidence scores\n# Plot distribution of confidence scores\n# Correlate confidence with manual evaluation results\n\n\n\n4.6.6 Combining Evaluation Approaches\nIn practice, a robust evaluation strategy uses multiple approaches:\n\nStart with manual ground truth on a small sample (~50-100 cards) to establish baseline accuracy\nUse consistency checks to automatically flag suspicious outputs\nApply model-as-judge on a larger sample to monitor quality\nPrioritize review using confidence scores or validation warnings\nContinuous monitoring as you process the full collection\n\nThis gives you both rigorous accuracy metrics and practical quality assurance at scale.",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#batch-processing",
-    "href": "patterns/structured-generation/advisor-index-cards.html#batch-processing",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.7 Batch Processing",
-    "text": "4.7 Batch Processing\nNow let’s process a larger batch of cards and analyze the results.\n\n# TODO: Process all available cards\n# Track timing, failures, warnings\n# Save results to file\n\n\n4.7.1 Results Analysis\n\n# TODO: Analyze batch results\n# - Success rate\n# - Failed to parse rate\n# - Validation warnings distribution\n# - Processing time statistics",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#edge-cases-and-failure-modes",
-    "href": "patterns/structured-generation/advisor-index-cards.html#edge-cases-and-failure-modes",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.8 Edge Cases and Failure Modes",
-    "text": "4.8 Edge Cases and Failure Modes\nWhat kinds of cards are hard for the model to process?\n\n# TODO: Examine failed/problematic extractions\n# Common patterns:\n# - Handwritten corrections/additions\n# - Faded or damaged cards\n# - Unusual formats or layouts\n# - Multiple entries per card",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#export-for-cataloging",
-    "href": "patterns/structured-generation/advisor-index-cards.html#export-for-cataloging",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.9 Export for Cataloging",
-    "text": "4.9 Export for Cataloging\nConvert the extracted data to formats suitable for library systems.\n\n# TODO: Export to CSV/JSON/XML\n# Consider catalog system requirements (MARC, Dublin Core, etc.)",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#next-steps",
-    "href": "patterns/structured-generation/advisor-index-cards.html#next-steps",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.10 Next Steps",
-    "text": "4.10 Next Steps\nThis notebook demonstrates the core extraction and evaluation workflow. For production deployment, you would need:\n\nRobust error handling - retry logic, fallbacks, logging\nQuality assurance workflow - human review interface for flagged items\nBatch processing infrastructure - queue management, progress tracking\nModel optimization - prompt tuning, model selection, cost optimization\n\nThese production considerations are covered in the appendices and separate infrastructure documentation.",
-    "crumbs": [
-      "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
-    ]
-  },
-  {
-    "objectID": "patterns/structured-generation/advisor-index-cards.html#key-takeaways",
-    "href": "patterns/structured-generation/advisor-index-cards.html#key-takeaways",
-    "title": "4  Practical Application: Advisor Index Card Extraction",
-    "section": "4.11 Key Takeaways",
-    "text": "4.11 Key Takeaways\n\nSimple schemas work better - Don’t over-engineer the structure\nMultiple evaluation strategies - Combine automated and manual approaches\nPlan for failure - Build in quality flags and review workflows\nDomain expertise matters - Work closely with catalogers to define requirements\nIterate based on results - Start small, evaluate, adjust, scale",
     "crumbs": [
       "Structured Information Extraction",
-      "<span class='chapter-number'>4</span>  <span class='chapter-title'>Practical Application: Advisor Index Card Extraction</span>"
     ]
   }
 ]

       "Welcome"
     ]
   },
+  {
+    "objectID": "patterns/what-is-an-ai-pattern.html",
+    "href": "patterns/what-is-an-ai-pattern.html",
+    "title": "1  What is an AI Pattern?",
+    "section": "",
+    "text": "1.1 Why Patterns?\nA pattern is a reusable solution to a commonly occurring problem. The concept comes from architecture—Christopher Alexander’s work on design patterns—and was later adopted by software engineering. In this book, we apply the same idea to AI implementations in GLAM contexts.\nAI and machine learning are evolving rapidly. The models, APIs, and frameworks we use today will be superseded—often within months. But the underlying problems—extracting structured data from historical documents, assessing condition at scale, making collections discoverable—persist.\nPatterns help us in three ways:\nThey’re technology-agnostic. A pattern describes what problem you’re solving and why an approach works, not just which model to use. When better models emerge, the pattern still applies.\nThey’re communicable. Patterns give teams a shared vocabulary. Saying “we’re using a structured extraction pattern” conveys more than listing the specific models and APIs involved.\nThey’re adaptable. The same pattern can be implemented differently depending on your constraints—budget, infrastructure, staff expertise, risk tolerance.",
+    "crumbs": [
+      "Design Patterns",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>What is an AI Pattern?</span>"
+    ]
+  },
+  {
+    "objectID": "patterns/what-is-an-ai-pattern.html#anatomy-of-a-pattern",
+    "href": "patterns/what-is-an-ai-pattern.html#anatomy-of-a-pattern",
+    "title": "1  What is an AI Pattern?",
+    "section": "1.2 Anatomy of a Pattern",
+    "text": "1.2 Anatomy of a Pattern\nEach pattern in this book follows a consistent structure:\nThe Challenge What recurring problem does this pattern address? What makes it difficult or impossible to solve with traditional approaches?\nSolution Overview The high-level approach. What makes this work? What are the key components?\nImplementation Technical walkthrough with working code. We use real examples from GLAM collections, not toy datasets.\nConsiderations When should you use this pattern? What are the tradeoffs? What might go wrong?",
+    "crumbs": [
+      "Design Patterns",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>What is an AI Pattern?</span>"
+    ]
+  },
+  {
+    "objectID": "patterns/what-is-an-ai-pattern.html#patterns-in-this-book",
+    "href": "patterns/what-is-an-ai-pattern.html#patterns-in-this-book",
+    "title": "1  What is an AI Pattern?",
+    "section": "1.3 Patterns in This Book",
+    "text": "1.3 Patterns in This Book\nThis book currently covers:\n\nStructured Information Extraction — Using Vision Language Models to extract structured metadata from document images (index cards, forms, registers)\n\nAdditional patterns will be added as the book develops.",
+    "crumbs": [
+      "Design Patterns",
+      "<span class='chapter-number'>1</span>  <span class='chapter-title'>What is an AI Pattern?</span>"
+    ]
+  },
   {
     "objectID": "patterns/structured-generation/intro.html",
     "href": "patterns/structured-generation/intro.html",
+    "title": "2  Structured Document Processing",
     "section": "",
+    "text": "2.1 The Challenge\nMany GLAM institutions have vast collections of structured documents—index cards, forms, registers—containing valuable information locked in physical or image formats. Manual transcription doesn’t scale, but the structured nature of these documents makes them ideal candidates for AI-powered processing.\nUnlocking this data means better discovery, new research possibilities, and integration with modern cataloguing systems.",
     "crumbs": [
       "Structured Information Extraction",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Structured Document Processing</span>"
     ]
   },
   {
     "objectID": "patterns/structured-generation/intro.html#dont-we-just-need-ocr",
     "href": "patterns/structured-generation/intro.html#dont-we-just-need-ocr",
+    "title": "2  Structured Document Processing",
     "section": "2.2 Don’t we just need OCR?",
+    "text": "2.2 Don’t we just need OCR?\nTraditional OCR extracts text from images, but that’s only half the problem. Consider an index card with a name, date, reference number, and description arranged in specific positions. OCR gives you a block of text—but not which part is the name, which is the date, or how they relate.\nOften, you don’t even need the raw text—you need the information it contains. A catalogue record doesn’t need “Mr. John Smith, 1847” preserved exactly; it needs name: \"John Smith\" and year: 1847 as usable data.\nWith OCR alone, you still need someone to parse text into structured fields. For hundreds of documents, that’s manageable. For hundreds of thousands, it’s not.",
     "crumbs": [
       "Structured Information Extraction",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Structured Document Processing</span>"
     ]
   },
   {
     "objectID": "patterns/structured-generation/intro.html#solution-overview",
     "href": "patterns/structured-generation/intro.html#solution-overview",
+    "title": "2  Structured Document Processing",
     "section": "2.3 Solution Overview",
+    "text": "2.3 Solution Overview\nStructured extraction is a pattern that works across modalities—text, images, audio transcripts. The core idea is the same: constrain a model to return data in a predefined schema rather than freeform text.\nFor document images, we use Vision Language Models (VLMs). Unlike OCR, VLMs understand both visual layout and textual content together. They can see that “1847” appears in the date field position, not just that the characters “1847” exist somewhere on the page.\nStructured output generation constrains the model to return your fields, your format. The result: input in, structured JSON out.\nThis section focuses on the image case—extracting from document images—but the same principles apply when working with text or other formats.\n\n2.3.1 What this pattern looks like\n\n\n\n\n\nflowchart LR\n    A[Document Image] --&gt; B[VLM + Schema]\n    B --&gt; C[Structured JSON]\n    C --&gt; D[Catalogue/Database]\n\n\n\n\n\n\nThe following chapters walk through this in detail—starting with basic VLM queries, then building to real extraction workflows with evaluation strategies.",
+    "crumbs": [
+      "Structured Information Extraction",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Structured Document Processing</span>"
+    ]
+  },
+  {
+    "objectID": "patterns/structured-generation/intro.html#when-to-use-this-pattern",
+    "href": "patterns/structured-generation/intro.html#when-to-use-this-pattern",
+    "title": "2  Structured Document Processing",
+    "section": "2.4 When to Use This Pattern",
+    "text": "2.4 When to Use This Pattern\nGood fit:\n\nForms, index cards, registers with consistent layouts\nDocuments where you know what fields you want to extract\nCollections too large for manual transcription\n\nLess suited:\n\nFree-form manuscripts with no predictable structure\nDocuments requiring deep contextual interpretation\nCases where verbatim transcription is the goal (use OCR instead)",
     "crumbs": [
       "Structured Information Extraction",
+      "<span class='chapter-number'>2</span>  <span class='chapter-title'>Structured Document Processing</span>"
     ]
   },
   {
     "href": "patterns/structured-generation/vlm-structured-generation.html",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "",
+    "text": "3.1 Introduction\nIn this chapter we’ll start to look at how we can use Visual Language Models (VLMs) to extract structured information from images of documents.\nWe already saw what this looked like at a conceptual level in the previous chapter. In this chapter we’ll get hands on with some code examples to illustrate how this can be done in practice. To start we’ll focus on some relatively simple documents and tasks. This allows us to focus on the core concepts without getting bogged down in too many complexities. We’ll use open source models accessed via the Hugging Face Inference API (you can also run them locally — see the appendix).",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     "href": "patterns/structured-generation/vlm-structured-generation.html#setup",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.3 Setup",
+    "text": "3.3 Setup\n\n3.3.1 Connecting to a Vision Language Model\nWe’ll use Hugging Face Inference Providers to access VLMs via an API. This means we don’t need to install or run any models locally — we just need a free Hugging Face account and an API token.\nSince the Hugging Face Inference API is compatible with the OpenAI Python client, all the code in this chapter will also work with local model servers (like LM Studio, Ollama, or vLLM) with just a one-line change to the client setup. See the appendix at the end of this chapter for details.\n\n\n\n\n\n\nTipGetting a Hugging Face Token\n\n\n\n\nCreate a free account at huggingface.co\nGo to Settings → Access Tokens\nCreate a new token with Read access\nSet it as an environment variable: export HF_TOKEN=hf_... or add it to a .env file\n\n\n\n\nimport os\nfrom openai import OpenAI\nfrom rich import print as rprint\nfrom dotenv import load_dotenv\nload_dotenv()\n\nclient = OpenAI(\n    base_url=\"https://router.huggingface.co/v1\",\n    api_key=os.environ.get(\"HF_TOKEN\"),\n)\n\nWe’ll use Qwen/Qwen3-VL-8B-Instruct throughout this chapter — an 8 billion parameter vision-language model that offers a good balance of quality and speed for document understanding tasks.",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     "href": "patterns/structured-generation/vlm-structured-generation.html#basic-vlm-query",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.4 Basic VLM Query",
+    "text": "3.4 Basic VLM Query\nLet’s start by defining a simple function that we can use to query a VLM with an image and a prompt. This function will handle converting the image to base64 and sending the request to the model.\nWe’ll default to using the Qwen/Qwen3-VL-8B-Instruct model via HF Inference Providers. You could experiment with different models later on — the code works with any OpenAI-compatible VLM endpoint.\n\n\nCode\nimport base64\nfrom PIL.Image import Image as PILImage\nfrom io import BytesIO\n\ndef query_image(image: str | PILImage, prompt: str, model: str='Qwen/Qwen3-VL-8B-Instruct', max_image_size: int=1024, client=client) -&gt; str:\n    \"\"\"Query VLM with an image.\"\"\"\n    if isinstance(image, PILImage):\n        # Convert PIL Image to bytes and encode to base64\n        buffered = BytesIO()\n        # ensure image is not too big\n        if image.size &gt; (max_image_size, max_image_size):\n            image.thumbnail((max_image_size, max_image_size))   \n        image.save(buffered, format=\"JPEG\")\n        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    else:\n        # Assume image is a file path\n        with open(image, \"rb\") as f:\n            image_base64 = base64.b64encode(f.read()).decode('utf-8')\n    #\n    # Query\n    response = client.chat.completions.create(\n        model=model,\n        messages=[{\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": prompt},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"}}\n            ]\n        }]\n    )\n    return response.choices[0].message.content",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     "href": "patterns/structured-generation/vlm-structured-generation.html#simple-vlm-query-example",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.5 102: Simple VLM Query Example",
+    "text": "3.5 102: Simple VLM Query Example\nTo get started let’s do a simple query to describe an image from the dataset.\n\nimage = ds[0][\"image\"]\n\n# Query the VLM to describe the image\ndescription = query_image(image, \"Describe this image.\", model='Qwen/Qwen3-VL-8B-Instruct')\nrprint(description)\n\nThis is a black-and-white image of a form from The British Library's Reprographic Section, used to request a copy \nof a manuscript or archival material.\n\nHere is a breakdown of the information on the form:\n\n*   **Institution:** The British Library, Reference Division, Reprographic Section.\n*   **Address:** Great Russell Street, London WC1B 3DG.\n*   **Department:** Manuscripts.\n*   **Shelfmark:** SLOANE 3972.C. (Vol. 1)\n*   **Order Number:** SCH NO 98876\n*   **Author:** This field is blank.\n*   **Title:** CATALOGUE OF SIR HANS SLOANES LIBRARY\n*   **Place and date of publication:** This field is blank.\n*   **Scale/Reduction:** The form indicates a reduction of 12, meaning the reproduction will be scaled down to \n1/12th of the original size. A ruler scale is provided for reference in centimetres and inches.\n\nThe form appears to be filled out by hand for a specific item: Volume 1 of the \"Catalogue of Sir Hans Sloane's \nLibrary,\" which is held in the Manuscripts department under the shelfmark SLOANE 3972.C. This catalogue was \ncompiled by Sir Hans Sloane himself and is a significant historical document detailing his extensive collection, \nwhich later formed part of the foundation of the British Museum and now resides at the British Library.\n\n\n\nWe can see we get a fairly useful description of the card. If we compare against the image we can see most of the details it mentions appear to be largely correct.\n\nimage\n\n\n\n\n\n\n\n\nThere are workflows where open ended description like this could be useful but this isn’t usually the kind of format we want if we want to take some action or do something based on the predictions of the model. In these cases it’s usually nice to have some more controlled output, for example, a label.",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     "href": "patterns/structured-generation/vlm-structured-generation.html#classification",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.6 Classification",
+    "text": "3.6 Classification\n\nWe’ll define a fairly simple prompt that asks the VLM to decide if a page is one of three categories. We describe each of these categopries and then ask the model to only return one of these as the output. We’ll do this for ten examples and we’ll also log how long it’s taking.\n\nimport time\nfrom tqdm.auto import tqdm\nfrom rich import print as rprint\n\nsample_size = 10\n\nsample = ds.take(sample_size)\n\nprompt = \"\"\"Classify this image into one of the following categories:\n\n1. **Index/Reference Card**: A library catalog or reference card\n\n2. **Manuscript Page**: A handwritten or historical document page\n\n3. **Other**: Any document that doesn't fit the above categories\n\nExamine the overall structure, layout, and content type to determine the classification. Focus on whether the document is a structured catalog/reference tool (Index Card) or a historical manuscript with continuous text (Manuscript Page).\n\nReturn only the category name: \"Index/Reference Card\", \"Manuscript Page\", or \"Other\"\n\"\"\"\n\nresults = []\n# Time the execution using standard Python\nstart_time = time.time()\nfor row in tqdm(sample):\n    image = row['image']\n    results.append(query_image(image, prompt, model='Qwen/Qwen3-VL-8B-Instruct'))\nelapsed_time = time.time() - start_time\nprint(f\"Execution time: {elapsed_time:.2f} seconds\")\nrprint(results)\n\n\n\n\nExecution time: 18.31 seconds\n\n\n[\n    'Index/Reference Card',\n    'Other',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page',\n    'Manuscript Page'\n]\n\n\n\nLet’s check the result that was predicted as “index/reference card”\n\nsample[0]['image']\n\n\n\n\n\n\n\n\nWe can extrapolate how long this would take for the full dataset\n\n# Calculate average time per image\navg_time_per_image = elapsed_time / sample_size\n\n# Project time for full dataset\ntotal_images = len(ds)\nprojected_time = avg_time_per_image * total_images\n\nprint(f\"Sample processing time: {elapsed_time:.2f} seconds ({elapsed_time/60:.2f} minutes)\")\nprint(f\"Average time per image: {avg_time_per_image:.2f} seconds\")\nprint(f\"Total images in dataset: {total_images}\")\nprint(f\"Projected time for full dataset: {projected_time/60:.2f} minutes ({projected_time/3600:.2f} hours)\")\n\nSample processing time: 18.31 seconds (0.31 minutes)\nAverage time per image: 1.83 seconds\nTotal images in dataset: 2734\nProjected time for full dataset: 83.45 minutes (1.39 hours)\n\n\n\n3.6.1 Classifying with structured labels\nIn the previous example, we relied on the model to return the label in the correct format. While this often works, it can sometimes lead to inconsistencies in the output. To address this, we can use Pydantic models to define a structured output format. This way, we can ensure that the output adheres to a specific schema.\nIn this example, we’ll define a Pydantic model for our classification task. The model will have a single field category which can take one of three literal values \"Index/Reference Card\", \"Manuscript Page\", or \"other\".\nWhat this means in practice is that the model will only be able to return one of these three values for the category field.\n\nfrom pydantic import BaseModel, Field\nfrom typing import Literal\n\nclass PageCategory(BaseModel):\n    category: Literal[\"Index/Reference Card\", \"Manuscript Page\", \"other\"] = Field(\n        ..., description=\"The category of the image\"\n    )\n\nWhen using the OpenAI client we can specify this Pydantic model as the response_format when making the request. This tells the model to return the output in a format that can be parsed into the Pydantic model (the APIs for this are still evolving so may change slightly over time).\n\nbuffered = BytesIO()\nimage.save(buffered, format=\"JPEG\")\nimage_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\ncompletion = client.beta.chat.completions.parse(\n    model=\"Qwen/Qwen3-VL-8B-Instruct\",\n    messages=[\n         {\n            \"role\": \"user\",\n            \"content\": [\n                {\n                    \"type\": \"text\",\n                    \"text\": prompt,\n                },\n                {\n                    \"type\": \"image_url\",\n                    \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"},\n                },\n            ],\n        },\n    ],\n    max_tokens=200,\n    temperature=0.7,\n    response_format=PageCategory,\n)\nrprint(completion)\nrprint(completion.choices[0].message.parsed)\n\nParsedChatCompletion[PageCategory](\n    id='47c1a911111046291f6bbec65ebb1ead',\n    choices=[\n        ParsedChoice[PageCategory](\n            finish_reason='stop',\n            index=0,\n            logprobs=None,\n            message=ParsedChatCompletionMessage[PageCategory](\n                content='{\\n  \"category\": \"Manuscript Page\"\\n}',\n                refusal=None,\n                role='assistant',\n                annotations=None,\n                audio=None,\n                function_call=None,\n                tool_calls=None,\n                parsed=PageCategory(category='Manuscript Page')\n            )\n        )\n    ],\n    created=1771255803,\n    model='qwen/qwen3-vl-8b-instruct',\n    object='chat.completion',\n    service_tier=None,\n    system_fingerprint='',\n    usage=CompletionUsage(\n        completion_tokens=13,\n        prompt_tokens=966,\n        total_tokens=979,\n        completion_tokens_details=CompletionTokensDetails(\n            accepted_prediction_tokens=0,\n            audio_tokens=0,\n            reasoning_tokens=0,\n            rejected_prediction_tokens=0,\n            text_tokens=13,\n            image_tokens=0,\n            video_tokens=0\n        ),\n        prompt_tokens_details=PromptTokensDetails(\n            audio_tokens=0,\n            cached_tokens=0,\n            cache_creation_input_tokens=0,\n            cache_read_input_tokens=0,\n            text_tokens=228,\n            image_tokens=738,\n            video_tokens=0\n        )\n    )\n)\n\n\n\nPageCategory(category='Manuscript Page')\n\n\n\n\nimage",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     "href": "patterns/structured-generation/vlm-structured-generation.html#beyond-classifying---extracting-structured-information",
     "title": "3  Structured Information Extraction with Vision Language Models",
     "section": "3.7 Beyond classifying - Extracting structured information",
+    "text": "3.7 Beyond classifying - Extracting structured information\nSo far we’ve focused on classifying images but what if we want to extract information from the images? Let’s take the first example from the dataset again.\n\nindex_image = ds[0]['image']\nindex_image\n\n\n\n\n\n\n\n\nIf we have an image like this we don’t just want to assign a label from it (we may do this as a first step) we actually want to extract the various fields from the card in a structured way. We can again use a Pydantic model to define the structure of the data we want to extract.\n\nfrom pydantic import BaseModel, Field\nfrom typing import Optional\n\n\nclass BritishLibraryReprographicCard(BaseModel):\n    \"\"\"\n    Pydantic model for extracting information from British Library Reference Division \n    reprographic cards used to document manuscripts and other materials.\n    \"\"\"\n    \n    department: str = Field(\n        ..., \n        description=\"The division that holds the material (e.g., 'MANUSCRIPTS')\"\n    )\n    \n    shelfmark: str = Field(\n        ..., \n        description=\"The library's classification/location code (e.g., 'SLOANE 3972.C. (VOL 1)')\"\n    )\n    \n    order: str = Field(\n        ..., \n        description=\"Order reference, typically starting with 'SCH NO' followed by numbers\"\n    )\n    \n    author: Optional[str] = Field(\n        None, \n        description=\"Author name if present, null if blank or marked with diagonal line\"\n    )\n    \n    title: str = Field(\n        ..., \n        description=\"The name of the work or manuscript\"\n    )\n    \n    place_and_date_of_publication: Optional[str] = Field(\n        None, \n        description=\"Place and date of publication if present, null if blank\"\n    )\n    \n    reduction: int = Field(\n        ..., \n        description=\"The reduction number shown at the bottom of the card\"\n    )\n\nWe’ll now create a function to handle the querying process using this structured schema.\n\ndef query_image_structured(image, prompt, schema, model='Qwen/Qwen3-VL-8B-Instruct'):\n    \"\"\"\n    Query VLM with an image and get structured output based on a Pydantic schema.\n    \n    Args:\n        image: PIL Image or file path to the image\n        prompt: Text prompt describing what to extract\n        schema: Pydantic model class defining the expected output structure\n        model: Model ID to use for the query\n    \n    Returns:\n        Parsed Pydantic model instance with the extracted data\n    \"\"\"\n    # Convert image to base64\n    if isinstance(image, PILImage):\n        buffered = BytesIO()\n        image.save(buffered, format=\"JPEG\")\n        image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')\n    else:\n        with open(image, \"rb\") as f:\n            image_base64 = base64.b64encode(f.read()).decode('utf-8')\n    \n    # Query with structured output\n    completion = client.beta.chat.completions.parse(\n        model=model,\n        messages=[{\n            \"role\": \"user\",\n            \"content\": [\n                {\"type\": \"text\", \"text\": prompt},\n                {\"type\": \"image_url\", \"image_url\": {\"url\": f\"data:image/jpeg;base64,{image_base64}\"}}\n            ]\n        }],\n        response_format=schema,\n        temperature=0.3  # Lower temperature for more consistent extraction\n    )\n    \n    # Return the parsed structured data\n    return completion.choices[0].message.parsed\n\nWe also need to define a prompt that describes what information we want to extract from the card.\n\n# Example usage\nextraction_prompt = \"\"\"\nExtract the information from this British Library card into structured data (JSON format).\n\nRead each field on the card and extract the following information:\n- department: The division name (e.g., \"MANUSCRIPTS\")\n- shelfmark: The catalog number (e.g., \"SLOANE 3972.C. (VOL 1)\")\n- order: The SCH NO reference number\n- author: The author name, or null if blank\n- title: The full title of the work\n- place_and_date_of_publication: Publication info, or null if blank\n- reduction: The reduction number (as integer) at bottom of card\n\nReturn the exact text as shown on the card. For empty fields with diagonal lines or no text, use null.\n\"\"\"\nresult = query_image_structured(index_image, extraction_prompt, BritishLibraryReprographicCard)\nrprint(result)\n\nBritishLibraryReprographicCard(\n    department='MANUSCRIPTS',\n    shelfmark='SLOANE 3972.C. (VOL 1)',\n    order='SCH NO 98876',\n    author=None,\n    title='CATALOGUE OF SIR HANS SLOANES LIBRARY',\n    place_and_date_of_publication=None,\n    reduction=12\n)\n\n\n\n\nrprint(result)\n\nBritishLibraryReprographicCard(\n    department='MANUSCRIPTS',\n    shelfmark='SLOANE 3972.C. (VOL 1)',\n    order='SCH NO 98876',\n    author=None,\n    title='CATALOGUE OF SIR HANS SLOANES LIBRARY',\n    place_and_date_of_publication=None,\n    reduction=12\n)\n\n\n\n\nindex_image",
     "crumbs": [
       "Structured Information Extraction",
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     ]
   },
   {
+    "objectID": "patterns/structured-generation/vlm-structured-generation.html#appendix-using-a-local-model",
+    "href": "patterns/structured-generation/vlm-structured-generation.html#appendix-using-a-local-model",
+    "title": "3  Structured Information Extraction with Vision Language Models",
+    "section": "3.8 Appendix: Using a Local Model",
+    "text": "3.8 Appendix: Using a Local Model\nAll the code in this chapter uses the OpenAI-compatible API, which means you can swap in a local model server with a single change to the client setup. Everything else — schemas, prompts, .parse() calls — works identically.\n\n# Replace the HF Inference client with a local server\nfrom openai import OpenAI\n\nclient = OpenAI(\n    base_url=\"http://localhost:1234/v1\",  # LM Studio default port\n    api_key=\"lm-studio\"                   # Default API key\n)\n\nPopular local options:\n\n\n\nTool\nBest for\nNotes\n\n\n\n\nLM Studio\nGetting started quickly\nGUI-based, MLX acceleration on Mac, built-in model browser\n\n\nOllama\nCLI workflows\nSimple ollama run commands, runs on port 11434\n\n\nvLLM\nProduction & batch processing\nGPU-optimized, highest throughput, best for large-scale extraction\n\n\n\n\n\n\n\n\n\nNote\n\n\n\nSmaller local models (2B-4B parameters) work well for simpler tasks like classification, but for accurate structured extraction you’ll generally want 8B+ parameter models. The trade-off is between running costs/speed (local, smaller models) and extraction quality (API or larger models).",
     "crumbs": [
       "Structured Information Extraction",
+      "<span class='chapter-number'>3</span>  <span class='chapter-title'>Structured Information Extraction with Vision Language Models</span>"
     ]
   }
 ]

site_libs/quarto-diagram/mermaid-init.js ADDED Viewed

	@@ -0,0 +1,275 @@

+// mermaid-init.js
+// Initializes the quarto-mermaid JS runtime
+//
+// Copyright (C) 2022 Posit Software, PBC
+/**
+ * String.prototype.replaceAll() polyfill
+ * https://gomakethings.com/how-to-replace-a-section-of-a-string-with-another-one-with-vanilla-js/
+ * @author Chris Ferdinandi
+ * @license MIT
+ */
+if (!String.prototype.replaceAll) {
+  String.prototype.replaceAll = function (str, newStr) {
+    // If a regex pattern
+    if (
+      Object.prototype.toString.call(str).toLowerCase() === "[object regexp]"
+    ) {
+      return this.replace(str, newStr);
+    }
+    // If a string
+    return this.replace(new RegExp(str, "g"), newStr);
+  };
+}
+const mermaidOpts = {
+  startOnLoad: false,
+};
+// this CSS is adapted from
+// mkdocs-material
+// Copyright (c) 2016-2022 Martin Donath <martin.donath@squidfunk.com>
+const defaultCSS =
+  ".label text{fill:var(--mermaid-fg-color)}.node circle,.node ellipse,.node path,.node polygon,.node rect{fill:var(--mermaid-node-bg-color);stroke:var(--mermaid-node-fg-color)}marker{fill:var(--mermaid-edge-color)!important}.edgeLabel .label rect{fill:#0000}.label{color:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.label foreignObject{line-height:normal;overflow:visible}.label div .edgeLabel{color:var(--mermaid-label-fg-color)}.edgeLabel,.edgeLabel rect,.label div .edgeLabel{background-color:var(--mermaid-label-bg-color)}.edgeLabel,.edgeLabel rect{fill:var(--mermaid-label-bg-color);color:var(--mermaid-edge-color)}.edgePath .path,.flowchart-link{stroke:var(--mermaid-edge-color)}.edgePath .arrowheadPath{fill:var(--mermaid-edge-color);stroke:none}.cluster rect{fill:var(--mermaid-fg-color--lightest);stroke:var(--mermaid-fg-color--lighter)}.cluster span{color:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}defs #flowchart-circleEnd,defs #flowchart-circleStart,defs #flowchart-crossEnd,defs #flowchart-crossStart,defs #flowchart-pointEnd,defs #flowchart-pointStart{stroke:none}g.classGroup line,g.classGroup rect{fill:var(--mermaid-node-bg-color);stroke:var(--mermaid-node-fg-color)}g.classGroup text{fill:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.classLabel .box{fill:var(--mermaid-label-bg-color);background-color:var(--mermaid-label-bg-color);opacity:1}.classLabel .label{fill:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.node .divider{stroke:var(--mermaid-node-fg-color)}.relation{stroke:var(--mermaid-edge-color)}.cardinality{fill:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.cardinality text{fill:inherit!important}defs #classDiagram-compositionEnd,defs #classDiagram-compositionStart,defs #classDiagram-dependencyEnd,defs #classDiagram-dependencyStart,defs #classDiagram-extensionEnd,defs #classDiagram-extensionStart{fill:var(--mermaid-edge-color)!important;stroke:var(--mermaid-edge-color)!important}defs #classDiagram-aggregationEnd,defs #classDiagram-aggregationStart{fill:var(--mermaid-label-bg-color)!important;stroke:var(--mermaid-edge-color)!important}g.stateGroup rect{fill:var(--mermaid-node-bg-color);stroke:var(--mermaid-node-fg-color)}g.stateGroup .state-title{fill:var(--mermaid-label-fg-color)!important;font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}g.stateGroup .composit{fill:var(--mermaid-label-bg-color)}.nodeLabel{color:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.node circle.state-end,.node circle.state-start,.start-state{fill:var(--mermaid-edge-color);stroke:none}.end-state-inner,.end-state-outer{fill:var(--mermaid-edge-color)}.end-state-inner,.node circle.state-end{stroke:var(--mermaid-label-bg-color)}.transition{stroke:var(--mermaid-edge-color)}[id^=state-fork] rect,[id^=state-join] rect{fill:var(--mermaid-edge-color)!important;stroke:none!important}.statediagram-cluster.statediagram-cluster .inner{fill:var(--mermaid-bg-color)}.statediagram-cluster rect{fill:var(--mermaid-node-bg-color);stroke:var(--mermaid-node-fg-color)}.statediagram-state rect.divider{fill:var(--mermaid-fg-color--lightest);stroke:var(--mermaid-fg-color--lighter)}defs #statediagram-barbEnd{stroke:var(--mermaid-edge-color)}.entityBox{fill:var(--mermaid-label-bg-color);stroke:var(--mermaid-node-fg-color)}.entityLabel{fill:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}.relationshipLabelBox{fill:var(--mermaid-label-bg-color);fill-opacity:1;background-color:var(--mermaid-label-bg-color);opacity:1}.relationshipLabel{fill:var(--mermaid-label-fg-color)}.relationshipLine{stroke:var(--mermaid-edge-color)}defs #ONE_OR_MORE_END *,defs #ONE_OR_MORE_START *,defs #ONLY_ONE_END *,defs #ONLY_ONE_START *,defs #ZERO_OR_MORE_END *,defs #ZERO_OR_MORE_START *,defs #ZERO_OR_ONE_END *,defs #ZERO_OR_ONE_START *{stroke:var(--mermaid-edge-color)!important}.actor,defs #ZERO_OR_MORE_END circle,defs #ZERO_OR_MORE_START circle{fill:var(--mermaid-label-bg-color)}.actor{stroke:var(--mermaid-node-fg-color)}text.actor>tspan{fill:var(--mermaid-label-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}line{stroke:var(--mermaid-fg-color--lighter)}.messageLine0,.messageLine1{stroke:var(--mermaid-edge-color)}.loopText>tspan,.messageText,.noteText>tspan{fill:var(--mermaid-edge-color);stroke:none;font-family:var(--mermaid-font-family)!important;font-weight:var(--mermaid-font-weight)!important}.noteText>tspan{fill:#000}#arrowhead path{fill:var(--mermaid-edge-color);stroke:none}.loopLine{stroke:var(--mermaid-node-fg-color)}.labelBox,.loopLine{fill:var(--mermaid-node-bg-color)}.labelBox{stroke:none}.labelText,.labelText>span{fill:var(--mermaid-node-fg-color);font-family:var(--mermaid-font-family);font-weight:var(--mermaid-font-weight)}";
+const mermaidThemeEl = document.querySelector('meta[name="mermaid-theme"]');
+if (mermaidThemeEl) {
+  mermaidOpts.theme = mermaidThemeEl.content;
+} else {
+  mermaidOpts.themeCSS = defaultCSS;
+}
+mermaid.initialize(mermaidOpts);
+const _quartoMermaid = {
+  // NB: there's effectively a copy of this function
+  // in `core/svg.ts`.
+  // if you change something here, you must keep it consistent there as well.
+  setSvgSize(svg) {
+    const { widthInPoints, heightInPoints, explicitHeight, explicitWidth } =
+      this.resolveSize(svg);
+    if (explicitWidth && explicitHeight) {
+      svg.setAttribute("width", widthInPoints);
+      svg.setAttribute("height", heightInPoints);
+      svg.style.maxWidth = null; // remove mermaid's default max-width
+    } else {
+      if (explicitWidth) {
+        svg.style.maxWidth = `${widthInPoints}px`;
+      }
+      if (explicitHeight) {
+        svg.style.maxHeight = `${heightInPoints}px`;
+      }
+    }
+  },
+  // NB: there's effectively a copy of this function
+  // in `core/svg.ts`.
+  // if you change something here, you must keep it consistent there as well.
+  makeResponsive(svg) {
+    const width = svg.getAttribute("width");
+    if (width === null) {
+      throw new Error("Couldn't find SVG width");
+    }
+    const numWidth = Number(width.slice(0, -2));
+    if (numWidth > 650) {
+      changed = true;
+      svg.setAttribute("width", "100%");
+      svg.removeAttribute("height");
+    }
+  },
+  // NB: there's effectively a copy of this function
+  // in `core/svg.ts`.
+  // if you change something here, you must keep it consistent there as well.
+  fixupAlignment(svg, align) {
+    let style = svg.getAttribute("style") || "";
+    switch (align) {
+      case "left":
+        style = `${style}; display: block; margin: auto auto auto 0`;
+        break;
+      case "right":
+        style = `${style}; display: block; margin: auto 0 auto auto`;
+        break;
+      case "center":
+        style = `${style}; display: block; margin: auto auto auto auto`;
+        break;
+    }
+    svg.setAttribute("style", style);
+  },
+  resolveOptions(svgEl) {
+    return svgEl.parentElement.parentElement.parentElement.parentElement
+      .dataset;
+  },
+  // NB: there's effectively a copy of this function
+  // in our mermaid runtime in `core/svg.ts`.
+  // if you change something here, you must keep it consistent there as well.
+  resolveSize(svgEl) {
+    const inInches = (size) => {
+      if (size.endsWith("in")) {
+        return Number(size.slice(0, -2));
+      }
+      if (size.endsWith("pt") || size.endsWith("px")) {
+        // assume 96 dpi for now
+        return Number(size.slice(0, -2)) / 96;
+      }
+      return Number(size);
+    };
+    // these are figWidth and figHeight on purpose,
+    // because data attributes are translated to camelCase by the DOM API
+    const kFigWidth = "figWidth",
+      kFigHeight = "figHeight";
+    const options = this.resolveOptions(svgEl);
+    let width = svgEl.getAttribute("width");
+    let height = svgEl.getAttribute("height");
+    const getViewBox = () => {
+      const vb = svgEl.attributes.getNamedItem("viewBox").value; // do it the roundabout way so that viewBox isn't dropped by deno_dom and text/html
+      if (!vb) return undefined;
+      const lst = vb.trim().split(" ").map(Number);
+      if (lst.length !== 4) return undefined;
+      if (lst.some(isNaN)) return undefined;
+      return lst;
+    };
+    if (!width || !height) {
+      // attempt to resolve figure dimensions via viewBox
+      const viewBox = getViewBox();
+      if (viewBox !== undefined) {
+        const [_mx, _my, vbWidth, vbHeight] = viewBox;
+        width = `${vbWidth}px`;
+        height = `${vbHeight}px`;
+      } else {
+        throw new Error(
+          "Mermaid generated an SVG without a viewbox attribute. Without knowing the diagram dimensions, quarto cannot convert it to a PNG"
+        );
+      }
+    }
+    let svgWidthInInches, svgHeightInInches;
+    if (
+      (width.slice(0, -2) === "pt" && height.slice(0, -2) === "pt") ||
+      (width.slice(0, -2) === "px" && height.slice(0, -2) === "px") ||
+      (!isNaN(Number(width)) && !isNaN(Number(height)))
+    ) {
+      // we assume 96 dpi which is generally what seems to be used.
+      svgWidthInInches = Number(width.slice(0, -2)) / 96;
+      svgHeightInInches = Number(height.slice(0, -2)) / 96;
+    }
+    const viewBox = getViewBox();
+    if (viewBox !== undefined) {
+      // assume width and height come from viewbox.
+      const [_mx, _my, vbWidth, vbHeight] = viewBox;
+      svgWidthInInches = vbWidth / 96;
+      svgHeightInInches = vbHeight / 96;
+    } else {
+      throw new Error(
+        "Internal Error: Couldn't resolve width and height of SVG"
+      );
+    }
+    const svgWidthOverHeight = svgWidthInInches / svgHeightInInches;
+    let widthInInches, heightInInches;
+    if (options[kFigWidth] && options[kFigHeight]) {
+      // both were prescribed, so just go with them
+      widthInInches = inInches(String(options[kFigWidth]));
+      heightInInches = inInches(String(options[kFigHeight]));
+    } else if (options[kFigWidth]) {
+      // we were only given width, use that and adjust height based on aspect ratio;
+      widthInInches = inInches(String(options[kFigWidth]));
+      heightInInches = widthInInches / svgWidthOverHeight;
+    } else if (options[kFigHeight]) {
+      // we were only given height, use that and adjust width based on aspect ratio;
+      heightInInches = inInches(String(options[kFigHeight]));
+      widthInInches = heightInInches * svgWidthOverHeight;
+    } else {
+      // we were not given either, use svg's prescribed height
+      heightInInches = svgHeightInInches;
+      widthInInches = svgWidthInInches;
+    }
+    return {
+      widthInInches,
+      heightInInches,
+      widthInPoints: Math.round(widthInInches * 96),
+      heightInPoints: Math.round(heightInInches * 96),
+      explicitWidth: options?.[kFigWidth] !== undefined,
+      explicitHeight: options?.[kFigHeight] !== undefined,
+    };
+  },
+  postProcess(svg) {
+    const options = this.resolveOptions(svg);
+    if (
+      options.responsive &&
+      options["figWidth"] === undefined &&
+      options["figHeight"] === undefined
+    ) {
+      this.makeResponsive(svg);
+    } else {
+      this.setSvgSize(svg);
+    }
+    if (options["reveal"]) {
+      this.fixupAlignment(svg, options["figAlign"] || "center");
+    }
+    // forward align attributes to the correct parent dif
+    // so that the svg figure is aligned correctly
+    const div = svg.parentElement.parentElement.parentElement;
+    const align = div.parentElement.parentElement.dataset.layoutAlign;
+    if (align) {
+      div.classList.remove("quarto-figure-left");
+      div.classList.remove("quarto-figure-center");
+      div.classList.remove("quarto-figure-right");
+      div.classList.add(`quarto-figure-${align}`);
+    }
+  },
+};
+// deno-lint-ignore no-window-prefix
+window.addEventListener(
+  "load",
+  async function () {
+    let i = 0;
+    // we need pre because of whitespace preservation
+    for (const el of Array.from(document.querySelectorAll("pre.mermaid-js"))) {
+      // &nbsp; doesn't appear to be treated as whitespace by mermaid
+      // so we replace it with a space.
+      const text = el.textContent.replaceAll("&nbsp;", " ");
+      const { svg: output } = await mermaid.mermaidAPI.render(
+        `mermaid-${++i}`,
+        text,
+        el
+      );
+      el.innerHTML = output;
+      if (el.dataset.label) {
+        // patch mermaid's emitted style
+        const svg = el.firstChild;
+        const style = svg.querySelector("style");
+        style.innerHTML = style.innerHTML.replaceAll(
+          `#${svg.id}`,
+          `#${el.dataset.label}-mermaid`
+        );
+        svg.id = el.dataset.label + "-mermaid";
+        delete el.dataset.label;
+      }
+      const svg = el.querySelector("svg");
+      const parent = el.parentElement;
+      parent.removeChild(el);
+      parent.appendChild(svg);
+      svg.classList.add("mermaid-js");
+    }
+    for (const svgEl of Array.from(
+      document.querySelectorAll("svg.mermaid-js")
+    )) {
+      _quartoMermaid.postProcess(svgEl);
+    }
+  },
+  false
+);

site_libs/quarto-diagram/mermaid.css ADDED Viewed

	@@ -0,0 +1,13 @@

+.mermaidTooltip {
+  position: absolute;
+  text-align: center;
+  max-width: 200px;
+  padding: 2px;
+  font-family: "trebuchet ms", verdana, arial;
+  font-size: 12px;
+  background: #ffffde;
+  border: 1px solid #aaaa33;
+  border-radius: 2px;
+  pointer-events: none;
+  z-index: 1000;
+}

site_libs/quarto-diagram/mermaid.min.js ADDED Viewed

The diff for this file is too large to render. See raw diff

site_libs/quarto-html/quarto-syntax-highlighting-ed96de9b727972fe78a7b5d16c58bf87.css ADDED Viewed

	@@ -0,0 +1,236 @@

+/* quarto syntax highlight colors */
+:root {
+  --quarto-hl-ot-color: #003B4F;
+  --quarto-hl-at-color: #657422;
+  --quarto-hl-ss-color: #20794D;
+  --quarto-hl-an-color: #5E5E5E;
+  --quarto-hl-fu-color: #4758AB;
+  --quarto-hl-st-color: #20794D;
+  --quarto-hl-cf-color: #003B4F;
+  --quarto-hl-op-color: #5E5E5E;
+  --quarto-hl-er-color: #AD0000;
+  --quarto-hl-bn-color: #AD0000;
+  --quarto-hl-al-color: #AD0000;
+  --quarto-hl-va-color: #111111;
+  --quarto-hl-bu-color: inherit;
+  --quarto-hl-ex-color: inherit;
+  --quarto-hl-pp-color: #AD0000;
+  --quarto-hl-in-color: #5E5E5E;
+  --quarto-hl-vs-color: #20794D;
+  --quarto-hl-wa-color: #5E5E5E;
+  --quarto-hl-do-color: #5E5E5E;
+  --quarto-hl-im-color: #00769E;
+  --quarto-hl-ch-color: #20794D;
+  --quarto-hl-dt-color: #AD0000;
+  --quarto-hl-fl-color: #AD0000;
+  --quarto-hl-co-color: #5E5E5E;
+  --quarto-hl-cv-color: #5E5E5E;
+  --quarto-hl-cn-color: #8f5902;
+  --quarto-hl-sc-color: #5E5E5E;
+  --quarto-hl-dv-color: #AD0000;
+  --quarto-hl-kw-color: #003B4F;
+}
+/* other quarto variables */
+:root {
+  --quarto-font-monospace: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+}
+/* syntax highlight based on Pandoc's rules */
+pre > code.sourceCode > span {
+  color: #003B4F;
+}
+code.sourceCode > span {
+  color: #003B4F;
+}
+div.sourceCode,
+div.sourceCode pre.sourceCode {
+  color: #003B4F;
+}
+/* Normal */
+code span {
+  color: #003B4F;
+}
+/* Alert */
+code span.al {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* Annotation */
+code span.an {
+  color: #5E5E5E;
+  font-style: inherit;
+}
+/* Attribute */
+code span.at {
+  color: #657422;
+  font-style: inherit;
+}
+/* BaseN */
+code span.bn {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* BuiltIn */
+code span.bu {
+  font-style: inherit;
+}
+/* ControlFlow */
+code span.cf {
+  color: #003B4F;
+  font-weight: bold;
+  font-style: inherit;
+}
+/* Char */
+code span.ch {
+  color: #20794D;
+  font-style: inherit;
+}
+/* Constant */
+code span.cn {
+  color: #8f5902;
+  font-style: inherit;
+}
+/* Comment */
+code span.co {
+  color: #5E5E5E;
+  font-style: inherit;
+}
+/* CommentVar */
+code span.cv {
+  color: #5E5E5E;
+  font-style: italic;
+}
+/* Documentation */
+code span.do {
+  color: #5E5E5E;
+  font-style: italic;
+}
+/* DataType */
+code span.dt {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* DecVal */
+code span.dv {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* Error */
+code span.er {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* Extension */
+code span.ex {
+  font-style: inherit;
+}
+/* Float */
+code span.fl {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* Function */
+code span.fu {
+  color: #4758AB;
+  font-style: inherit;
+}
+/* Import */
+code span.im {
+  color: #00769E;
+  font-style: inherit;
+}
+/* Information */
+code span.in {
+  color: #5E5E5E;
+  font-style: inherit;
+}
+/* Keyword */
+code span.kw {
+  color: #003B4F;
+  font-weight: bold;
+  font-style: inherit;
+}
+/* Operator */
+code span.op {
+  color: #5E5E5E;
+  font-style: inherit;
+}
+/* Other */
+code span.ot {
+  color: #003B4F;
+  font-style: inherit;
+}
+/* Preprocessor */
+code span.pp {
+  color: #AD0000;
+  font-style: inherit;
+}
+/* SpecialChar */
+code span.sc {
+  color: #5E5E5E;
+  font-style: inherit;
+}
+/* SpecialString */
+code span.ss {
+  color: #20794D;
+  font-style: inherit;
+}
+/* String */
+code span.st {
+  color: #20794D;
+  font-style: inherit;
+}
+/* Variable */
+code span.va {
+  color: #111111;
+  font-style: inherit;
+}
+/* VerbatimString */
+code span.vs {
+  color: #20794D;
+  font-style: inherit;
+}
+/* Warning */
+code span.wa {
+  color: #5E5E5E;
+  font-style: italic;
+}
+.prevent-inlining {
+  content: "</";
+}
+/*# sourceMappingURL=0920dd6d7437995b8cdf7429764427b1.css.map */