Upload 3 files

Browse files

Files changed (3) hide show

20250803_langextract/extraction_results.jsonl +1 -0
20250803_langextract/test.ipynb +809 -0
20250803_langextract/visualization.html +191 -0

20250803_langextract/extraction_results.jsonl ADDED Viewed

	@@ -0,0 +1 @@

+ {"extractions": [{"extraction_class": "depature_date", "extraction_text": "2025/08/05", "char_interval": null, "alignment_status": null, "extraction_index": 1, "group_index": 0, "description": null, "attributes": {}}, {"extraction_class": "arrival_date", "extraction_text": "2025/08/04", "char_interval": null, "alignment_status": null, "extraction_index": 2, "group_index": 1, "description": null, "attributes": {}}, {"extraction_class": "name", "extraction_text": "nakamura john", "char_interval": {"start_pos": 31, "end_pos": 44}, "alignment_status": "match_exact", "extraction_index": 3, "group_index": 2, "description": null, "attributes": {}}, {"extraction_class": "fright_name", "extraction_text": "cx0009", "char_interval": {"start_pos": 55, "end_pos": 61}, "alignment_status": "match_exact", "extraction_index": 4, "group_index": 3, "description": null, "attributes": {}}], "text": "[dat]20250805[dat]20250804[nam]nakamura john[age]30[br]cx0009[fr]ar0520", "document_id": "doc_c6b4f79c"}

20250803_langextract/test.ipynb ADDED Viewed

	@@ -0,0 +1,809 @@

+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "3bf0e2df",
+   "metadata": {},
+   "source": [
+    "# sample test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "eb638e6d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import langextract as lx\n",
+    "import textwrap\n",
+    "from langextract import inference\n",
+    "\n",
+    "# 1. Define the prompt and extraction rules\n",
+    "prompt = textwrap.dedent(\"\"\"\\\n",
+    "    Extract characters, emotions, and relationships in order of appearance.\n",
+    "    Use exact text for extractions. Do not paraphrase or overlap entities.\n",
+    "    Provide meaningful attributes for each entity to add context.\"\"\")\n",
+    "\n",
+    "# 2. Provide a high-quality example to guide the model\n",
+    "examples = [\n",
+    "    lx.data.ExampleData(\n",
+    "        text=\"ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.\",\n",
+    "        extractions=[\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"character\",\n",
+    "                extraction_text=\"ROMEO\",\n",
+    "                attributes={\"emotional_state\": \"wonder\"}\n",
+    "            ),\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"emotion\",\n",
+    "                extraction_text=\"But soft!\",\n",
+    "                attributes={\"feeling\": \"gentle awe\"}\n",
+    "            ),\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"relationship\",\n",
+    "                extraction_text=\"Juliet is the sun\",\n",
+    "                attributes={\"type\": \"metaphor\"}\n",
+    "            ),\n",
+    "        ]\n",
+    "    )\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "c72822d9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m68\u001b[0m chars, processed=\u001b[92m68\u001b[0m chars:  [00:11]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Extraction processing complete\n",
+      "\u001b[92m✓\u001b[0m Extracted \u001b[1m3\u001b[0m entities (\u001b[1m3\u001b[0m unique types)\n",
+      "  \u001b[96m•\u001b[0m Time: \u001b[1m11.09s\u001b[0m\n",
+      "  \u001b[96m•\u001b[0m Speed: \u001b[1m6\u001b[0m chars/sec\n",
+      "  \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The input text to be processed\n",
+    "input_text = \"Lady Juliet gazed longingly at the stars, her heart aching for Romeo\"\n",
+    "\n",
+    "# Run the extraction\n",
+    "result = lx.extract(\n",
+    "    text_or_documents=input_text,\n",
+    "    prompt_description=prompt,\n",
+    "    examples=examples,\n",
+    "    language_model_type=inference.OllamaLanguageModel,\n",
+    "    model_id=\"gemma2:latest\",\n",
+    "    model_url=\"http://localhost:11434\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "a0c64fc9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mType:\u001b[39m        AnnotatedDocument\n",
+      "\u001b[31mString form:\u001b[39m AnnotatedDocument(extractions=[Extraction(extraction_class='character', extraction_text='Lady Jul <...> ={'type': 'love'})], text='Lady Juliet gazed longingly at the stars, her heart aching for Romeo')\n",
+      "\u001b[31mFile:\u001b[39m        c:\\users\\kenta\\appdata\\local\\programs\\python\\python312\\lib\\site-packages\\langextract\\data.py\n",
+      "\u001b[31mDocstring:\u001b[39m  \n",
+      "Class for representing annotated documents.\n",
+      "\n",
+      "Attributes:\n",
+      "  document_id: Unique identifier for each document - autogenerated if not\n",
+      "    set.\n",
+      "  extractions: List of extractions in the document.\n",
+      "  text: Raw text representation of the document.\n",
+      "  tokenized_text: Tokenized text of the document, computed from `text`."
+     ]
+    }
+   ],
+   "source": [
+    "?result"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "af83d97e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Extraction(extraction_class='character', extraction_text='Lady Juliet', char_interval=CharInterval(start_pos=0, end_pos=11), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'emotional_state': 'longing'}),\n",
+       " Extraction(extraction_class='emotion', extraction_text='aching', char_interval=CharInterval(start_pos=52, end_pos=58), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'feeling': 'sorrow'}),\n",
+       " Extraction(extraction_class='relationship', extraction_text='Lady Juliet... for Romeo', char_interval=CharInterval(start_pos=0, end_pos=68), alignment_status=<AlignmentStatus.MATCH_FUZZY: 'match_fuzzy'>, extraction_index=3, group_index=2, description=None, attributes={'type': 'love'})]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result.extractions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "aadaf861",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "3622840e",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Saving to \u001b[92mextraction_results.jsonl\u001b[0m: 1 docs [00:00, 501.95 docs/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Saved \u001b[1m1\u001b[0m documents to \u001b[92mextraction_results.jsonl\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Loading \u001b[92mextraction_results.jsonl\u001b[0m: 100%|█████████▉| 918/919 [00:00<00:00, 230kB/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Loaded \u001b[1m1\u001b[0m documents from \u001b[92mextraction_results.jsonl\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "7947"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Save the results to a JSONL file\n",
+    "from pathlib import Path\n",
+    "lx.io.save_annotated_documents([result], output_name=\"extraction_results.jsonl\", output_dir=Path(\".\"))\n",
+    "\n",
+    "# Generate the visualization from the file\n",
+    "html_content = lx.visualize(\"extraction_results.jsonl\")\n",
+    "# HTML 本体文字列を取得してファイル化\n",
+    "html_str: str = html_content.data  # HTML 文字列が .data に格納されている\n",
+    "output_path = Path(\"visualization.html\")\n",
+    "output_path.write_text(html_str, encoding=\"utf-8\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "16c245f1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style>\n",
+       ".lx-highlight { position: relative; border-radius:3px; padding:1px 2px;}\n",
+       ".lx-highlight .lx-tooltip {\n",
+       "  visibility: hidden;\n",
+       "  opacity: 0;\n",
+       "  transition: opacity 0.2s ease-in-out;\n",
+       "  background: #333;\n",
+       "  color: #fff;\n",
+       "  text-align: left;\n",
+       "  border-radius: 4px;\n",
+       "  padding: 6px 8px;\n",
+       "  position: absolute;\n",
+       "  z-index: 1000;\n",
+       "  bottom: 125%;\n",
+       "  left: 50%;\n",
+       "  transform: translateX(-50%);\n",
+       "  font-size: 12px;\n",
+       "  max-width: 240px;\n",
+       "  white-space: normal;\n",
+       "  box-shadow: 0 2px 6px rgba(0,0,0,0.3);\n",
+       "}\n",
+       ".lx-highlight:hover .lx-tooltip { visibility: visible; opacity:1; }\n",
+       ".lx-animated-wrapper { max-width: 100%; font-family: Arial, sans-serif; }\n",
+       ".lx-controls {\n",
+       "  background: #fafafa; border: 1px solid #90caf9; border-radius: 8px;\n",
+       "  padding: 12px; margin-bottom: 16px;\n",
+       "}\n",
+       ".lx-button-row {\n",
+       "  display: flex; justify-content: center; gap: 8px; margin-bottom: 12px;\n",
+       "}\n",
+       ".lx-control-btn {\n",
+       "  background: #4285f4; color: white; border: none; border-radius: 4px;\n",
+       "  padding: 8px 16px; cursor: pointer; font-size: 13px; font-weight: 500;\n",
+       "  transition: background-color 0.2s;\n",
+       "}\n",
+       ".lx-control-btn:hover { background: #3367d6; }\n",
+       ".lx-progress-container {\n",
+       "  margin-bottom: 8px;\n",
+       "}\n",
+       ".lx-progress-slider {\n",
+       "  width: 100%; margin: 0; appearance: none; height: 6px;\n",
+       "  background: #ddd; border-radius: 3px; outline: none;\n",
+       "}\n",
+       ".lx-progress-slider::-webkit-slider-thumb {\n",
+       "  appearance: none; width: 18px; height: 18px; background: #4285f4;\n",
+       "  border-radius: 50%; cursor: pointer;\n",
+       "}\n",
+       ".lx-progress-slider::-moz-range-thumb {\n",
+       "  width: 18px; height: 18px; background: #4285f4; border-radius: 50%;\n",
+       "  cursor: pointer; border: none;\n",
+       "}\n",
+       ".lx-status-text {\n",
+       "  text-align: center; font-size: 12px; color: #666; margin-top: 4px;\n",
+       "}\n",
+       ".lx-text-window {\n",
+       "  font-family: monospace; white-space: pre-wrap; border: 1px solid #90caf9;\n",
+       "  padding: 12px; max-height: 260px; overflow-y: auto; margin-bottom: 12px;\n",
+       "  line-height: 1.6;\n",
+       "}\n",
+       ".lx-attributes-panel {\n",
+       "  background: #fafafa; border: 1px solid #90caf9; border-radius: 6px;\n",
+       "  padding: 8px 10px; margin-top: 8px; font-size: 13px;\n",
+       "}\n",
+       ".lx-current-highlight {\n",
+       "  text-decoration: underline;\n",
+       "  text-decoration-color: #ff4444;\n",
+       "  text-decoration-thickness: 3px;\n",
+       "  font-weight: bold;\n",
+       "  animation: lx-pulse 1s ease-in-out;\n",
+       "}\n",
+       "@keyframes lx-pulse {\n",
+       "  0% { text-decoration-color: #ff4444; }\n",
+       "  50% { text-decoration-color: #ff0000; }\n",
+       "  100% { text-decoration-color: #ff4444; }\n",
+       "}\n",
+       ".lx-legend {\n",
+       "  font-size: 12px; margin-bottom: 8px;\n",
+       "  padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;\n",
+       "}\n",
+       ".lx-label {\n",
+       "  display: inline-block;\n",
+       "  padding: 2px 4px;\n",
+       "  border-radius: 3px;\n",
+       "  margin-right: 4px;\n",
+       "  color: #000;\n",
+       "}\n",
+       ".lx-attr-key {\n",
+       "  font-weight: 600;\n",
+       "  color: #1565c0;\n",
+       "  letter-spacing: 0.3px;\n",
+       "}\n",
+       ".lx-attr-value {\n",
+       "  font-weight: 400;\n",
+       "  opacity: 0.85;\n",
+       "  letter-spacing: 0.2px;\n",
+       "}\n",
+       "\n",
+       "/* Add optimizations with larger fonts and better readability for GIFs */\n",
+       ".lx-gif-optimized .lx-text-window { font-size: 16px; line-height: 1.8; }\n",
+       ".lx-gif-optimized .lx-attributes-panel { font-size: 15px; }\n",
+       ".lx-gif-optimized .lx-current-highlight { text-decoration-thickness: 4px; }\n",
+       "</style>\n",
+       "<div class=\"lx-animated-wrapper lx-gif-optimized\">\n",
+       "  <div class=\"lx-attributes-panel\">\n",
+       "    <div class=\"lx-legend\">Highlights Legend: <span class=\"lx-label\" style=\"background-color:#D2E3FC;\">character</span> <span class=\"lx-label\" style=\"background-color:#C8E6C9;\">emotion</span> <span class=\"lx-label\" style=\"background-color:#FEF0C3;\">relationship</span></div>\n",
+       "    <div id=\"attributesContainer\"></div>\n",
+       "  </div>\n",
+       "  <div class=\"lx-text-window\" id=\"textWindow\">\n",
+       "    <span class=\"lx-highlight lx-current-highlight\" data-idx=\"0\" style=\"background-color:#FEF0C3;\"><span class=\"lx-highlight\" data-idx=\"1\" style=\"background-color:#D2E3FC;\">Lady Juliet</span> gazed longingly at the stars, her heart <span class=\"lx-highlight\" data-idx=\"2\" style=\"background-color:#C8E6C9;\">aching</span> for Romeo</span>\n",
+       "  </div>\n",
+       "  <div class=\"lx-controls\">\n",
+       "    <div class=\"lx-button-row\">\n",
+       "      <button class=\"lx-control-btn\" onclick=\"playPause()\">▶️ Play</button>\n",
+       "      <button class=\"lx-control-btn\" onclick=\"prevExtraction()\">⏮ Previous</button>\n",
+       "      <button class=\"lx-control-btn\" onclick=\"nextExtraction()\">⏭ Next</button>\n",
+       "    </div>\n",
+       "    <div class=\"lx-progress-container\">\n",
+       "      <input type=\"range\" id=\"progressSlider\" class=\"lx-progress-slider\"\n",
+       "             min=\"0\" max=\"2\" value=\"0\"\n",
+       "             onchange=\"jumpToExtraction(this.value)\">\n",
+       "    </div>\n",
+       "    <div class=\"lx-status-text\">\n",
+       "      Entity <span id=\"entityInfo\">1/3</span> |\n",
+       "      Pos <span id=\"posInfo\">[0-11]</span>\n",
+       "    </div>\n",
+       "  </div>\n",
+       "</div>\n",
+       "\n",
+       "<script>\n",
+       "  (function() {\n",
+       "    const extractions = [{\"index\": 0, \"class\": \"relationship\", \"text\": \"Lady Juliet and Romeo\", \"color\": \"#FEF0C3\", \"startPos\": 0, \"endPos\": 68, \"beforeText\": \"\", \"extractionText\": \"Lady Juliet gazed longingly at the stars, her heart aching for Romeo\", \"afterText\": \"\", \"attributesHtml\": \"<div><strong>class:</strong> relationship</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">type</span>: <span class=\\\"lx-attr-value\\\">romantic love</span>}</div>\"}, {\"index\": 1, \"class\": \"character\", \"text\": \"Lady Juliet\", \"color\": \"#D2E3FC\", \"startPos\": 0, \"endPos\": 11, \"beforeText\": \"\", \"extractionText\": \"Lady Juliet\", \"afterText\": \" gazed longingly at the stars, her heart aching for Romeo\", \"attributesHtml\": \"<div><strong>class:</strong> character</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">emotional_state</span>: <span class=\\\"lx-attr-value\\\">longing</span>}</div>\"}, {\"index\": 2, \"class\": \"emotion\", \"text\": \"aching\", \"color\": \"#C8E6C9\", \"startPos\": 52, \"endPos\": 58, \"beforeText\": \"Lady Juliet gazed longingly at the stars, her heart \", \"extractionText\": \"aching\", \"afterText\": \" for Romeo\", \"attributesHtml\": \"<div><strong>class:</strong> emotion</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">feeling</span>: <span class=\\\"lx-attr-value\\\">sorrowful desire</span>}</div>\"}];\n",
+       "    let currentIndex = 0;\n",
+       "    let isPlaying = false;\n",
+       "    let animationInterval = null;\n",
+       "    let animationSpeed = 1.0;\n",
+       "\n",
+       "    function updateDisplay() {\n",
+       "      const extraction = extractions[currentIndex];\n",
+       "      if (!extraction) return;\n",
+       "\n",
+       "      document.getElementById('attributesContainer').innerHTML = extraction.attributesHtml;\n",
+       "      document.getElementById('entityInfo').textContent = (currentIndex + 1) + '/' + extractions.length;\n",
+       "      document.getElementById('posInfo').textContent = '[' + extraction.startPos + '-' + extraction.endPos + ']';\n",
+       "      document.getElementById('progressSlider').value = currentIndex;\n",
+       "\n",
+       "      const playBtn = document.querySelector('.lx-control-btn');\n",
+       "      if (playBtn) playBtn.textContent = isPlaying ? '⏸ Pause' : '▶️ Play';\n",
+       "\n",
+       "      const prevHighlight = document.querySelector('.lx-text-window .lx-current-highlight');\n",
+       "      if (prevHighlight) prevHighlight.classList.remove('lx-current-highlight');\n",
+       "      const currentSpan = document.querySelector('.lx-text-window span[data-idx=\"' + currentIndex + '\"]');\n",
+       "      if (currentSpan) {\n",
+       "        currentSpan.classList.add('lx-current-highlight');\n",
+       "        currentSpan.scrollIntoView({block: 'center', behavior: 'smooth'});\n",
+       "      }\n",
+       "    }\n",
+       "\n",
+       "    function nextExtraction() {\n",
+       "      currentIndex = (currentIndex + 1) % extractions.length;\n",
+       "      updateDisplay();\n",
+       "    }\n",
+       "\n",
+       "    function prevExtraction() {\n",
+       "      currentIndex = (currentIndex - 1 + extractions.length) % extractions.length;\n",
+       "      updateDisplay();\n",
+       "    }\n",
+       "\n",
+       "    function jumpToExtraction(index) {\n",
+       "      currentIndex = parseInt(index);\n",
+       "      updateDisplay();\n",
+       "    }\n",
+       "\n",
+       "    function playPause() {\n",
+       "      if (isPlaying) {\n",
+       "        clearInterval(animationInterval);\n",
+       "        isPlaying = false;\n",
+       "      } else {\n",
+       "        animationInterval = setInterval(nextExtraction, animationSpeed * 1000);\n",
+       "        isPlaying = true;\n",
+       "      }\n",
+       "      updateDisplay();\n",
+       "    }\n",
+       "\n",
+       "    window.playPause = playPause;\n",
+       "    window.nextExtraction = nextExtraction;\n",
+       "    window.prevExtraction = prevExtraction;\n",
+       "    window.jumpToExtraction = jumpToExtraction;\n",
+       "\n",
+       "    updateDisplay();\n",
+       "  })();\n",
+       "</script>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.HTML object>"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "html_content"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "49ec5f64",
+   "metadata": {},
+   "source": [
+    "# My test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "2314fae3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import langextract as lx\n",
+    "import textwrap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "2a39a1c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 1. Define the prompt and extraction rules\n",
+    "prompt = textwrap.dedent(\"\"\"\\\n",
+    "    フライトの情報です。データの規則性に従い、データを抽出してください。\n",
+    "    抽出は、データの順序を保ち、言い換えやパラフレーズを避けてください。\n",
+    "    各エンティティには、意味のある属性を追加してコンテキストを提供してください。\n",
+    "    出発日、到着日、フライト名などの情報を抽出してください。その際に日付から考えて、出発、到着の順になるように整合性を確認してください。\"\"\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "15aa1dd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 2. Provide a high-quality example to guide the model\n",
+    "examples = [\n",
+    "    lx.data.ExampleData(\n",
+    "        text=\"[dat]20250801[nam]taro tanaka[age]20[dat]20250803[fr]cx0520\",\n",
+    "        extractions=[\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"depature_date\",\n",
+    "                extraction_text=\"2025/08/01\",\n",
+    "            ),\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"name\",\n",
+    "                extraction_text=\"taro tanaka!\",\n",
+    "            ),\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"arrival_date\",\n",
+    "                extraction_text=\"2025/08/03\",\n",
+    "            ),\n",
+    "            lx.data.Extraction(\n",
+    "                extraction_class=\"fright_name\",\n",
+    "                extraction_text=\"cx0520\",\n",
+    "            ),\n",
+    "        ]\n",
+    "        )]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "82f1b2bf",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m71\u001b[0m chars, processed=\u001b[92m71\u001b[0m chars:  [00:09]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Extraction processing complete\n",
+      "\u001b[92m✓\u001b[0m Extracted \u001b[1m4\u001b[0m entities (\u001b[1m4\u001b[0m unique types)\n",
+      "  \u001b[96m•\u001b[0m Time: \u001b[1m9.18s\u001b[0m\n",
+      "  \u001b[96m•\u001b[0m Speed: \u001b[1m8\u001b[0m chars/sec\n",
+      "  \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# The input text to be processed\n",
+    "input_text = \"[dat]20250804[nam]nakamura john[age]30[dat]20250805[br]cx0009[fr]ar0520\"\n",
+    "input_text =  \"[dat]20250805[dat]20250804[nam]nakamura john[age]30[br]cx0009[fr]ar0520\"\n",
+    "\n",
+    "# Run the extraction\n",
+    "result = lx.extract(\n",
+    "    text_or_documents=input_text,\n",
+    "    prompt_description=prompt,\n",
+    "    examples=examples,\n",
+    "    language_model_type=inference.OllamaLanguageModel,\n",
+    "    model_id=\"gemma2:latest\",\n",
+    "    model_url=\"http://localhost:11434\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "b6d58afe",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Saving to \u001b[92mextraction_results.jsonl\u001b[0m: 1 docs [00:00, 500.10 docs/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Saved \u001b[1m1\u001b[0m documents to \u001b[92mextraction_results.jsonl\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Loading \u001b[92mextraction_results.jsonl\u001b[0m: 100%|█████████▉| 997/998 [00:00<00:00, 994kB/s]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Loaded \u001b[1m1\u001b[0m documents from \u001b[92mextraction_results.jsonl\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Save the results to a JSONL file\n",
+    "from pathlib import Path\n",
+    "lx.io.save_annotated_documents([result], output_name=\"extraction_results.jsonl\", output_dir=Path(\".\"))\n",
+    "\n",
+    "# Generate the visualization from the file\n",
+    "html_content = lx.visualize(\"extraction_results.jsonl\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "0d45589e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Extraction(extraction_class='departure_date', extraction_text='2025/08/04', char_interval=None, alignment_status=None, extraction_index=1, group_index=0, description=None, attributes={}),\n",
+       " Extraction(extraction_class='name', extraction_text='nakamura john', char_interval=CharInterval(start_pos=18, end_pos=31), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={}),\n",
+       " Extraction(extraction_class='arrival_date', extraction_text='2025/08/05', char_interval=None, alignment_status=None, extraction_index=3, group_index=2, description=None, attributes={}),\n",
+       " Extraction(extraction_class='flight_name', extraction_text='cx0009', char_interval=CharInterval(start_pos=55, end_pos=61), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'type': 'departure'}),\n",
+       " Extraction(extraction_class='flight_name', extraction_text='ar0520', char_interval=CharInterval(start_pos=65, end_pos=71), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'type': 'arrival'})]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result.extractions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "34459cac",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Extraction(extraction_class='depature_date', extraction_text='2025/08/05', char_interval=None, alignment_status=None, extraction_index=1, group_index=0, description=None, attributes={}),\n",
+       " Extraction(extraction_class='arrival_date', extraction_text='2025/08/04', char_interval=None, alignment_status=None, extraction_index=2, group_index=1, description=None, attributes={}),\n",
+       " Extraction(extraction_class='name', extraction_text='nakamura john', char_interval=CharInterval(start_pos=31, end_pos=44), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={}),\n",
+       " Extraction(extraction_class='fright_name', extraction_text='cx0009', char_interval=CharInterval(start_pos=55, end_pos=61), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={})]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result.extractions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e2eba844",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "14fbd61f",
+   "metadata": {},
+   "source": [
+    "# 階層テスト"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "id": "bd3dfda7",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m40\u001b[0m chars, processed=\u001b[92m40\u001b[0m chars:  [00:21]"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[92m✓\u001b[0m Extraction processing complete\n",
+      "\u001b[92m✓\u001b[0m Extracted \u001b[1m5\u001b[0m entities (\u001b[1m1\u001b[0m unique types)\n",
+      "  \u001b[96m•\u001b[0m Time: \u001b[1m21.95s\u001b[0m\n",
+      "  \u001b[96m•\u001b[0m Speed: \u001b[1m2\u001b[0m chars/sec\n",
+      "  \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n",
+      "AnnotatedDocument(extractions=[Extraction(extraction_class='heading', extraction_text='第2章：分析', char_interval=CharInterval(start_pos=0, end_pos=6), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'level': 1, 'children': ['2.1 データ', '2.2 結果']}), Extraction(extraction_class='heading', extraction_text='2.1 データ', char_interval=CharInterval(start_pos=7, end_pos=14), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'level': 2, 'children': ['2.1.1 収集', '2.1.2 前処理']}), Extraction(extraction_class='heading', extraction_text='2.1.1 収集', char_interval=CharInterval(start_pos=15, end_pos=23), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={'level': 3, 'children': []}), Extraction(extraction_class='heading', extraction_text='2.1.2 前処理', char_interval=CharInterval(start_pos=24, end_pos=33), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'level': 3, 'children': []}), Extraction(extraction_class='heading', extraction_text='2.2 結果', char_interval=CharInterval(start_pos=34, end_pos=40), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'level': 2, 'children': []})], text='第2章：分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果')\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "import langextract as lx\n",
+    "import textwrap\n",
+    "\n",
+    "prompt = textwrap.dedent(\"\"\"\\\n",
+    "以下の文章から「見出し階層」を抽出してください。\n",
+    "各階層は JSON にネストされた children リストで表現します。\n",
+    "出力の構造を見本にならって厳密に守ってください。\"\"\")\n",
+    "\n",
+    "# ツリー構造の具体例\n",
+    "examples = [\n",
+    "    # 単一階層\n",
+    "    lx.data.ExampleData(\n",
+    "      text=\"第1章：概要\",\n",
+    "      extractions=[\n",
+    "        lx.data.Extraction(\n",
+    "          extraction_class=\"heading\",\n",
+    "          extraction_text=\"第1章：概要\",\n",
+    "          attributes={\"level\": 1, \"children\": []}\n",
+    "        )\n",
+    "      ]\n",
+    "    ),\n",
+    "    # 2階層あり\n",
+    "    lx.data.ExampleData(\n",
+    "      text=\"第1章：概要\\n1.1 背景\\n1.2 目的\",\n",
+    "      extractions=[\n",
+    "        lx.data.Extraction(\"heading\",\n",
+    "                           \"第1章：概要\",\n",
+    "                          attributes={\"level\": 1, \"children\": [\"1.1 背景\", \"1.2 目的\"]}),\n",
+    "        lx.data.Extraction(\"heading\", \"1.1 背景\",  attributes={\"level\": 2, \"children\": []}),\n",
+    "        lx.data.Extraction(\"heading\", \"1.2 目的\",  attributes={\"level\": 2, \"children\": []}),\n",
+    "      ]\n",
+    "    ),\n",
+    "    # 3階層と子無しケース\n",
+    "    lx.data.ExampleData(\n",
+    "      text=\"第2章：分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果\",\n",
+    "      extractions=[\n",
+    "        lx.data.Extraction(\"heading\", \"第2章：分析\",  attributes={\"level\": 1, \"children\": [\"2.1 データ\", \"2.2 結果\"]}),\n",
+    "        lx.data.Extraction(\"heading\", \"2.1 データ\",  attributes={\"level\": 2, \"children\": [\"2.1.1 収集\", \"2.1.2 前処理\"]}),\n",
+    "        lx.data.Extraction(\"heading\", \"2.1.1 収集\",  attributes={\"level\": 3, \"children\": []}),\n",
+    "        lx.data.Extraction(\"heading\", \"2.1.2 前処理\",  attributes={\"level\": 3, \"children\": []}),\n",
+    "        lx.data.Extraction(\"heading\", \"2.2 結果\",  attributes={\"level\": 2, \"children\": []}),\n",
+    "      ]\n",
+    "    )\n",
+    "]\n",
+    "\n",
+    "result = lx.extract(\n",
+    "  text_or_documents=\"第2章：分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果\",\n",
+    "  prompt_description=prompt,\n",
+    "  examples=examples,\n",
+    "  language_model_type=inference.OllamaLanguageModel,\n",
+    "  model_id=\"gemma2:latest\",\n",
+    "  model_url=\"http://localhost:11434\"\n",
+    ")\n",
+    "\n",
+    "print(result)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "id": "d355c87a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "[Extraction(extraction_class='heading', extraction_text='第2章：分析', char_interval=CharInterval(start_pos=0, end_pos=6), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'level': 1, 'children': ['2.1 データ', '2.2 結果']}),\n",
+       " Extraction(extraction_class='heading', extraction_text='2.1 データ', char_interval=CharInterval(start_pos=7, end_pos=14), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'level': 2, 'children': ['2.1.1 収集', '2.1.2 前処理']}),\n",
+       " Extraction(extraction_class='heading', extraction_text='2.1.1 収集', char_interval=CharInterval(start_pos=15, end_pos=23), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={'level': 3, 'children': []}),\n",
+       " Extraction(extraction_class='heading', extraction_text='2.1.2 前処理', char_interval=CharInterval(start_pos=24, end_pos=33), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'level': 3, 'children': []}),\n",
+       " Extraction(extraction_class='heading', extraction_text='2.2 結果', char_interval=CharInterval(start_pos=34, end_pos=40), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'level': 2, 'children': []})]"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "result.extractions"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

20250803_langextract/visualization.html ADDED Viewed

	@@ -0,0 +1,191 @@

+<style>
+.lx-highlight { position: relative; border-radius:3px; padding:1px 2px;}
+.lx-highlight .lx-tooltip {
+  visibility: hidden;
+  opacity: 0;
+  transition: opacity 0.2s ease-in-out;
+  background: #333;
+  color: #fff;
+  text-align: left;
+  border-radius: 4px;
+  padding: 6px 8px;
+  position: absolute;
+  z-index: 1000;
+  bottom: 125%;
+  left: 50%;
+  transform: translateX(-50%);
+  font-size: 12px;
+  max-width: 240px;
+  white-space: normal;
+  box-shadow: 0 2px 6px rgba(0,0,0,0.3);
+}
+.lx-highlight:hover .lx-tooltip { visibility: visible; opacity:1; }
+.lx-animated-wrapper { max-width: 100%; font-family: Arial, sans-serif; }
+.lx-controls {
+  background: #fafafa; border: 1px solid #90caf9; border-radius: 8px;
+  padding: 12px; margin-bottom: 16px;
+}
+.lx-button-row {
+  display: flex; justify-content: center; gap: 8px; margin-bottom: 12px;
+}
+.lx-control-btn {
+  background: #4285f4; color: white; border: none; border-radius: 4px;
+  padding: 8px 16px; cursor: pointer; font-size: 13px; font-weight: 500;
+  transition: background-color 0.2s;
+}
+.lx-control-btn:hover { background: #3367d6; }
+.lx-progress-container {
+  margin-bottom: 8px;
+}
+.lx-progress-slider {
+  width: 100%; margin: 0; appearance: none; height: 6px;
+  background: #ddd; border-radius: 3px; outline: none;
+}
+.lx-progress-slider::-webkit-slider-thumb {
+  appearance: none; width: 18px; height: 18px; background: #4285f4;
+  border-radius: 50%; cursor: pointer;
+}
+.lx-progress-slider::-moz-range-thumb {
+  width: 18px; height: 18px; background: #4285f4; border-radius: 50%;
+  cursor: pointer; border: none;
+}
+.lx-status-text {
+  text-align: center; font-size: 12px; color: #666; margin-top: 4px;
+}
+.lx-text-window {
+  font-family: monospace; white-space: pre-wrap; border: 1px solid #90caf9;
+  padding: 12px; max-height: 260px; overflow-y: auto; margin-bottom: 12px;
+  line-height: 1.6;
+}
+.lx-attributes-panel {
+  background: #fafafa; border: 1px solid #90caf9; border-radius: 6px;
+  padding: 8px 10px; margin-top: 8px; font-size: 13px;
+}
+.lx-current-highlight {
+  text-decoration: underline;
+  text-decoration-color: #ff4444;
+  text-decoration-thickness: 3px;
+  font-weight: bold;
+  animation: lx-pulse 1s ease-in-out;
+}
+@keyframes lx-pulse {
+  0% { text-decoration-color: #ff4444; }
+  50% { text-decoration-color: #ff0000; }
+  100% { text-decoration-color: #ff4444; }
+}
+.lx-legend {
+  font-size: 12px; margin-bottom: 8px;
+  padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;
+}
+.lx-label {
+  display: inline-block;
+  padding: 2px 4px;
+  border-radius: 3px;
+  margin-right: 4px;
+  color: #000;
+}
+.lx-attr-key {
+  font-weight: 600;
+  color: #1565c0;
+  letter-spacing: 0.3px;
+}
+.lx-attr-value {
+  font-weight: 400;
+  opacity: 0.85;
+  letter-spacing: 0.2px;
+}
+/* Add optimizations with larger fonts and better readability for GIFs */
+.lx-gif-optimized .lx-text-window { font-size: 16px; line-height: 1.8; }
+.lx-gif-optimized .lx-attributes-panel { font-size: 15px; }
+.lx-gif-optimized .lx-current-highlight { text-decoration-thickness: 4px; }
+</style>
+<div class="lx-animated-wrapper lx-gif-optimized">
+  <div class="lx-attributes-panel">
+    <div class="lx-legend">Highlights Legend: <span class="lx-label" style="background-color:#D2E3FC;">character</span> <span class="lx-label" style="background-color:#C8E6C9;">emotion</span> <span class="lx-label" style="background-color:#FEF0C3;">relationship</span></div>
+    <div id="attributesContainer"></div>
+  </div>
+  <div class="lx-text-window" id="textWindow">
+    <span class="lx-highlight lx-current-highlight" data-idx="0" style="background-color:#FEF0C3;"><span class="lx-highlight" data-idx="1" style="background-color:#D2E3FC;">Lady Juliet</span> gazed longingly at the stars, her heart <span class="lx-highlight" data-idx="2" style="background-color:#C8E6C9;">aching</span> for Romeo</span>
+  </div>
+  <div class="lx-controls">
+    <div class="lx-button-row">
+      <button class="lx-control-btn" onclick="playPause()">▶️ Play</button>
+      <button class="lx-control-btn" onclick="prevExtraction()">⏮ Previous</button>
+      <button class="lx-control-btn" onclick="nextExtraction()">⏭ Next</button>
+    </div>
+    <div class="lx-progress-container">
+      <input type="range" id="progressSlider" class="lx-progress-slider"
+             min="0" max="2" value="0"
+             onchange="jumpToExtraction(this.value)">
+    </div>
+    <div class="lx-status-text">
+      Entity <span id="entityInfo">1/3</span> |
+      Pos <span id="posInfo">[0-11]</span>
+    </div>
+  </div>
+</div>
+<script>
+  (function() {
+    const extractions = [{"index": 0, "class": "relationship", "text": "Lady Juliet and Romeo", "color": "#FEF0C3", "startPos": 0, "endPos": 68, "beforeText": "", "extractionText": "Lady Juliet gazed longingly at the stars, her heart aching for Romeo", "afterText": "", "attributesHtml": "<div><strong>class:</strong> relationship</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">type</span>: <span class=\"lx-attr-value\">romantic love</span>}</div>"}, {"index": 1, "class": "character", "text": "Lady Juliet", "color": "#D2E3FC", "startPos": 0, "endPos": 11, "beforeText": "", "extractionText": "Lady Juliet", "afterText": " gazed longingly at the stars, her heart aching for Romeo", "attributesHtml": "<div><strong>class:</strong> character</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">emotional_state</span>: <span class=\"lx-attr-value\">longing</span>}</div>"}, {"index": 2, "class": "emotion", "text": "aching", "color": "#C8E6C9", "startPos": 52, "endPos": 58, "beforeText": "Lady Juliet gazed longingly at the stars, her heart ", "extractionText": "aching", "afterText": " for Romeo", "attributesHtml": "<div><strong>class:</strong> emotion</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">feeling</span>: <span class=\"lx-attr-value\">sorrowful desire</span>}</div>"}];
+    let currentIndex = 0;
+    let isPlaying = false;
+    let animationInterval = null;
+    let animationSpeed = 1.0;
+    function updateDisplay() {
+      const extraction = extractions[currentIndex];
+      if (!extraction) return;
+      document.getElementById('attributesContainer').innerHTML = extraction.attributesHtml;
+      document.getElementById('entityInfo').textContent = (currentIndex + 1) + '/' + extractions.length;
+      document.getElementById('posInfo').textContent = '[' + extraction.startPos + '-' + extraction.endPos + ']';
+      document.getElementById('progressSlider').value = currentIndex;
+      const playBtn = document.querySelector('.lx-control-btn');
+      if (playBtn) playBtn.textContent = isPlaying ? '⏸ Pause' : '▶️ Play';
+      const prevHighlight = document.querySelector('.lx-text-window .lx-current-highlight');
+      if (prevHighlight) prevHighlight.classList.remove('lx-current-highlight');
+      const currentSpan = document.querySelector('.lx-text-window span[data-idx="' + currentIndex + '"]');
+      if (currentSpan) {
+        currentSpan.classList.add('lx-current-highlight');
+        currentSpan.scrollIntoView({block: 'center', behavior: 'smooth'});
+      }
+    }
+    function nextExtraction() {
+      currentIndex = (currentIndex + 1) % extractions.length;
+      updateDisplay();
+    }
+    function prevExtraction() {
+      currentIndex = (currentIndex - 1 + extractions.length) % extractions.length;
+      updateDisplay();
+    }
+    function jumpToExtraction(index) {
+      currentIndex = parseInt(index);
+      updateDisplay();
+    }
+    function playPause() {
+      if (isPlaying) {
+        clearInterval(animationInterval);
+        isPlaying = false;
+      } else {
+        animationInterval = setInterval(nextExtraction, animationSpeed * 1000);
+        isPlaying = true;
+      }
+      updateDisplay();
+    }
+    window.playPause = playPause;
+    window.nextExtraction = nextExtraction;
+    window.prevExtraction = prevExtraction;
+    window.jumpToExtraction = jumpToExtraction;
+    updateDisplay();
+  })();
+</script>