Okoge-keys commited on
Commit
aad43b4
·
verified ·
1 Parent(s): 5ce3c32

Upload 3 files

Browse files
20250803_langextract/extraction_results.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"extractions": [{"extraction_class": "depature_date", "extraction_text": "2025/08/05", "char_interval": null, "alignment_status": null, "extraction_index": 1, "group_index": 0, "description": null, "attributes": {}}, {"extraction_class": "arrival_date", "extraction_text": "2025/08/04", "char_interval": null, "alignment_status": null, "extraction_index": 2, "group_index": 1, "description": null, "attributes": {}}, {"extraction_class": "name", "extraction_text": "nakamura john", "char_interval": {"start_pos": 31, "end_pos": 44}, "alignment_status": "match_exact", "extraction_index": 3, "group_index": 2, "description": null, "attributes": {}}, {"extraction_class": "fright_name", "extraction_text": "cx0009", "char_interval": {"start_pos": 55, "end_pos": 61}, "alignment_status": "match_exact", "extraction_index": 4, "group_index": 3, "description": null, "attributes": {}}], "text": "[dat]20250805[dat]20250804[nam]nakamura john[age]30[br]cx0009[fr]ar0520", "document_id": "doc_c6b4f79c"}
20250803_langextract/test.ipynb ADDED
@@ -0,0 +1,809 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "3bf0e2df",
6
+ "metadata": {},
7
+ "source": [
8
+ "# sample test"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": 1,
14
+ "id": "eb638e6d",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import langextract as lx\n",
19
+ "import textwrap\n",
20
+ "from langextract import inference\n",
21
+ "\n",
22
+ "# 1. Define the prompt and extraction rules\n",
23
+ "prompt = textwrap.dedent(\"\"\"\\\n",
24
+ " Extract characters, emotions, and relationships in order of appearance.\n",
25
+ " Use exact text for extractions. Do not paraphrase or overlap entities.\n",
26
+ " Provide meaningful attributes for each entity to add context.\"\"\")\n",
27
+ "\n",
28
+ "# 2. Provide a high-quality example to guide the model\n",
29
+ "examples = [\n",
30
+ " lx.data.ExampleData(\n",
31
+ " text=\"ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.\",\n",
32
+ " extractions=[\n",
33
+ " lx.data.Extraction(\n",
34
+ " extraction_class=\"character\",\n",
35
+ " extraction_text=\"ROMEO\",\n",
36
+ " attributes={\"emotional_state\": \"wonder\"}\n",
37
+ " ),\n",
38
+ " lx.data.Extraction(\n",
39
+ " extraction_class=\"emotion\",\n",
40
+ " extraction_text=\"But soft!\",\n",
41
+ " attributes={\"feeling\": \"gentle awe\"}\n",
42
+ " ),\n",
43
+ " lx.data.Extraction(\n",
44
+ " extraction_class=\"relationship\",\n",
45
+ " extraction_text=\"Juliet is the sun\",\n",
46
+ " attributes={\"type\": \"metaphor\"}\n",
47
+ " ),\n",
48
+ " ]\n",
49
+ " )\n",
50
+ "]"
51
+ ]
52
+ },
53
+ {
54
+ "cell_type": "code",
55
+ "execution_count": 2,
56
+ "id": "c72822d9",
57
+ "metadata": {},
58
+ "outputs": [
59
+ {
60
+ "name": "stderr",
61
+ "output_type": "stream",
62
+ "text": [
63
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m68\u001b[0m chars, processed=\u001b[92m68\u001b[0m chars: [00:11]"
64
+ ]
65
+ },
66
+ {
67
+ "name": "stdout",
68
+ "output_type": "stream",
69
+ "text": [
70
+ "\u001b[92m✓\u001b[0m Extraction processing complete\n",
71
+ "\u001b[92m✓\u001b[0m Extracted \u001b[1m3\u001b[0m entities (\u001b[1m3\u001b[0m unique types)\n",
72
+ " \u001b[96m•\u001b[0m Time: \u001b[1m11.09s\u001b[0m\n",
73
+ " \u001b[96m•\u001b[0m Speed: \u001b[1m6\u001b[0m chars/sec\n",
74
+ " \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n"
75
+ ]
76
+ },
77
+ {
78
+ "name": "stderr",
79
+ "output_type": "stream",
80
+ "text": [
81
+ "\n"
82
+ ]
83
+ }
84
+ ],
85
+ "source": [
86
+ "# The input text to be processed\n",
87
+ "input_text = \"Lady Juliet gazed longingly at the stars, her heart aching for Romeo\"\n",
88
+ "\n",
89
+ "# Run the extraction\n",
90
+ "result = lx.extract(\n",
91
+ " text_or_documents=input_text,\n",
92
+ " prompt_description=prompt,\n",
93
+ " examples=examples,\n",
94
+ " language_model_type=inference.OllamaLanguageModel,\n",
95
+ " model_id=\"gemma2:latest\",\n",
96
+ " model_url=\"http://localhost:11434\"\n",
97
+ ")"
98
+ ]
99
+ },
100
+ {
101
+ "cell_type": "code",
102
+ "execution_count": 9,
103
+ "id": "a0c64fc9",
104
+ "metadata": {},
105
+ "outputs": [
106
+ {
107
+ "name": "stdout",
108
+ "output_type": "stream",
109
+ "text": [
110
+ "\u001b[31mType:\u001b[39m AnnotatedDocument\n",
111
+ "\u001b[31mString form:\u001b[39m AnnotatedDocument(extractions=[Extraction(extraction_class='character', extraction_text='Lady Jul <...> ={'type': 'love'})], text='Lady Juliet gazed longingly at the stars, her heart aching for Romeo')\n",
112
+ "\u001b[31mFile:\u001b[39m c:\\users\\kenta\\appdata\\local\\programs\\python\\python312\\lib\\site-packages\\langextract\\data.py\n",
113
+ "\u001b[31mDocstring:\u001b[39m \n",
114
+ "Class for representing annotated documents.\n",
115
+ "\n",
116
+ "Attributes:\n",
117
+ " document_id: Unique identifier for each document - autogenerated if not\n",
118
+ " set.\n",
119
+ " extractions: List of extractions in the document.\n",
120
+ " text: Raw text representation of the document.\n",
121
+ " tokenized_text: Tokenized text of the document, computed from `text`."
122
+ ]
123
+ }
124
+ ],
125
+ "source": [
126
+ "?result"
127
+ ]
128
+ },
129
+ {
130
+ "cell_type": "code",
131
+ "execution_count": 12,
132
+ "id": "af83d97e",
133
+ "metadata": {},
134
+ "outputs": [
135
+ {
136
+ "data": {
137
+ "text/plain": [
138
+ "[Extraction(extraction_class='character', extraction_text='Lady Juliet', char_interval=CharInterval(start_pos=0, end_pos=11), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'emotional_state': 'longing'}),\n",
139
+ " Extraction(extraction_class='emotion', extraction_text='aching', char_interval=CharInterval(start_pos=52, end_pos=58), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'feeling': 'sorrow'}),\n",
140
+ " Extraction(extraction_class='relationship', extraction_text='Lady Juliet... for Romeo', char_interval=CharInterval(start_pos=0, end_pos=68), alignment_status=<AlignmentStatus.MATCH_FUZZY: 'match_fuzzy'>, extraction_index=3, group_index=2, description=None, attributes={'type': 'love'})]"
141
+ ]
142
+ },
143
+ "execution_count": 12,
144
+ "metadata": {},
145
+ "output_type": "execute_result"
146
+ }
147
+ ],
148
+ "source": [
149
+ "result.extractions"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "id": "aadaf861",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": []
159
+ },
160
+ {
161
+ "cell_type": "code",
162
+ "execution_count": 10,
163
+ "id": "3622840e",
164
+ "metadata": {},
165
+ "outputs": [
166
+ {
167
+ "name": "stderr",
168
+ "output_type": "stream",
169
+ "text": [
170
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Saving to \u001b[92mextraction_results.jsonl\u001b[0m: 1 docs [00:00, 501.95 docs/s]"
171
+ ]
172
+ },
173
+ {
174
+ "name": "stdout",
175
+ "output_type": "stream",
176
+ "text": [
177
+ "\u001b[92m✓\u001b[0m Saved \u001b[1m1\u001b[0m documents to \u001b[92mextraction_results.jsonl\u001b[0m\n"
178
+ ]
179
+ },
180
+ {
181
+ "name": "stderr",
182
+ "output_type": "stream",
183
+ "text": [
184
+ "\n",
185
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Loading \u001b[92mextraction_results.jsonl\u001b[0m: 100%|█████████▉| 918/919 [00:00<00:00, 230kB/s]"
186
+ ]
187
+ },
188
+ {
189
+ "name": "stdout",
190
+ "output_type": "stream",
191
+ "text": [
192
+ "\u001b[92m✓\u001b[0m Loaded \u001b[1m1\u001b[0m documents from \u001b[92mextraction_results.jsonl\u001b[0m\n"
193
+ ]
194
+ },
195
+ {
196
+ "name": "stderr",
197
+ "output_type": "stream",
198
+ "text": [
199
+ "\n"
200
+ ]
201
+ },
202
+ {
203
+ "data": {
204
+ "text/plain": [
205
+ "7947"
206
+ ]
207
+ },
208
+ "execution_count": 10,
209
+ "metadata": {},
210
+ "output_type": "execute_result"
211
+ }
212
+ ],
213
+ "source": [
214
+ "# Save the results to a JSONL file\n",
215
+ "from pathlib import Path\n",
216
+ "lx.io.save_annotated_documents([result], output_name=\"extraction_results.jsonl\", output_dir=Path(\".\"))\n",
217
+ "\n",
218
+ "# Generate the visualization from the file\n",
219
+ "html_content = lx.visualize(\"extraction_results.jsonl\")\n",
220
+ "# HTML 本体文字列を取得してファイル化\n",
221
+ "html_str: str = html_content.data # HTML 文字列が .data に格納されている\n",
222
+ "output_path = Path(\"visualization.html\")\n",
223
+ "output_path.write_text(html_str, encoding=\"utf-8\")"
224
+ ]
225
+ },
226
+ {
227
+ "cell_type": "code",
228
+ "execution_count": 8,
229
+ "id": "16c245f1",
230
+ "metadata": {},
231
+ "outputs": [
232
+ {
233
+ "data": {
234
+ "text/html": [
235
+ "<style>\n",
236
+ ".lx-highlight { position: relative; border-radius:3px; padding:1px 2px;}\n",
237
+ ".lx-highlight .lx-tooltip {\n",
238
+ " visibility: hidden;\n",
239
+ " opacity: 0;\n",
240
+ " transition: opacity 0.2s ease-in-out;\n",
241
+ " background: #333;\n",
242
+ " color: #fff;\n",
243
+ " text-align: left;\n",
244
+ " border-radius: 4px;\n",
245
+ " padding: 6px 8px;\n",
246
+ " position: absolute;\n",
247
+ " z-index: 1000;\n",
248
+ " bottom: 125%;\n",
249
+ " left: 50%;\n",
250
+ " transform: translateX(-50%);\n",
251
+ " font-size: 12px;\n",
252
+ " max-width: 240px;\n",
253
+ " white-space: normal;\n",
254
+ " box-shadow: 0 2px 6px rgba(0,0,0,0.3);\n",
255
+ "}\n",
256
+ ".lx-highlight:hover .lx-tooltip { visibility: visible; opacity:1; }\n",
257
+ ".lx-animated-wrapper { max-width: 100%; font-family: Arial, sans-serif; }\n",
258
+ ".lx-controls {\n",
259
+ " background: #fafafa; border: 1px solid #90caf9; border-radius: 8px;\n",
260
+ " padding: 12px; margin-bottom: 16px;\n",
261
+ "}\n",
262
+ ".lx-button-row {\n",
263
+ " display: flex; justify-content: center; gap: 8px; margin-bottom: 12px;\n",
264
+ "}\n",
265
+ ".lx-control-btn {\n",
266
+ " background: #4285f4; color: white; border: none; border-radius: 4px;\n",
267
+ " padding: 8px 16px; cursor: pointer; font-size: 13px; font-weight: 500;\n",
268
+ " transition: background-color 0.2s;\n",
269
+ "}\n",
270
+ ".lx-control-btn:hover { background: #3367d6; }\n",
271
+ ".lx-progress-container {\n",
272
+ " margin-bottom: 8px;\n",
273
+ "}\n",
274
+ ".lx-progress-slider {\n",
275
+ " width: 100%; margin: 0; appearance: none; height: 6px;\n",
276
+ " background: #ddd; border-radius: 3px; outline: none;\n",
277
+ "}\n",
278
+ ".lx-progress-slider::-webkit-slider-thumb {\n",
279
+ " appearance: none; width: 18px; height: 18px; background: #4285f4;\n",
280
+ " border-radius: 50%; cursor: pointer;\n",
281
+ "}\n",
282
+ ".lx-progress-slider::-moz-range-thumb {\n",
283
+ " width: 18px; height: 18px; background: #4285f4; border-radius: 50%;\n",
284
+ " cursor: pointer; border: none;\n",
285
+ "}\n",
286
+ ".lx-status-text {\n",
287
+ " text-align: center; font-size: 12px; color: #666; margin-top: 4px;\n",
288
+ "}\n",
289
+ ".lx-text-window {\n",
290
+ " font-family: monospace; white-space: pre-wrap; border: 1px solid #90caf9;\n",
291
+ " padding: 12px; max-height: 260px; overflow-y: auto; margin-bottom: 12px;\n",
292
+ " line-height: 1.6;\n",
293
+ "}\n",
294
+ ".lx-attributes-panel {\n",
295
+ " background: #fafafa; border: 1px solid #90caf9; border-radius: 6px;\n",
296
+ " padding: 8px 10px; margin-top: 8px; font-size: 13px;\n",
297
+ "}\n",
298
+ ".lx-current-highlight {\n",
299
+ " text-decoration: underline;\n",
300
+ " text-decoration-color: #ff4444;\n",
301
+ " text-decoration-thickness: 3px;\n",
302
+ " font-weight: bold;\n",
303
+ " animation: lx-pulse 1s ease-in-out;\n",
304
+ "}\n",
305
+ "@keyframes lx-pulse {\n",
306
+ " 0% { text-decoration-color: #ff4444; }\n",
307
+ " 50% { text-decoration-color: #ff0000; }\n",
308
+ " 100% { text-decoration-color: #ff4444; }\n",
309
+ "}\n",
310
+ ".lx-legend {\n",
311
+ " font-size: 12px; margin-bottom: 8px;\n",
312
+ " padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;\n",
313
+ "}\n",
314
+ ".lx-label {\n",
315
+ " display: inline-block;\n",
316
+ " padding: 2px 4px;\n",
317
+ " border-radius: 3px;\n",
318
+ " margin-right: 4px;\n",
319
+ " color: #000;\n",
320
+ "}\n",
321
+ ".lx-attr-key {\n",
322
+ " font-weight: 600;\n",
323
+ " color: #1565c0;\n",
324
+ " letter-spacing: 0.3px;\n",
325
+ "}\n",
326
+ ".lx-attr-value {\n",
327
+ " font-weight: 400;\n",
328
+ " opacity: 0.85;\n",
329
+ " letter-spacing: 0.2px;\n",
330
+ "}\n",
331
+ "\n",
332
+ "/* Add optimizations with larger fonts and better readability for GIFs */\n",
333
+ ".lx-gif-optimized .lx-text-window { font-size: 16px; line-height: 1.8; }\n",
334
+ ".lx-gif-optimized .lx-attributes-panel { font-size: 15px; }\n",
335
+ ".lx-gif-optimized .lx-current-highlight { text-decoration-thickness: 4px; }\n",
336
+ "</style>\n",
337
+ "<div class=\"lx-animated-wrapper lx-gif-optimized\">\n",
338
+ " <div class=\"lx-attributes-panel\">\n",
339
+ " <div class=\"lx-legend\">Highlights Legend: <span class=\"lx-label\" style=\"background-color:#D2E3FC;\">character</span> <span class=\"lx-label\" style=\"background-color:#C8E6C9;\">emotion</span> <span class=\"lx-label\" style=\"background-color:#FEF0C3;\">relationship</span></div>\n",
340
+ " <div id=\"attributesContainer\"></div>\n",
341
+ " </div>\n",
342
+ " <div class=\"lx-text-window\" id=\"textWindow\">\n",
343
+ " <span class=\"lx-highlight lx-current-highlight\" data-idx=\"0\" style=\"background-color:#FEF0C3;\"><span class=\"lx-highlight\" data-idx=\"1\" style=\"background-color:#D2E3FC;\">Lady Juliet</span> gazed longingly at the stars, her heart <span class=\"lx-highlight\" data-idx=\"2\" style=\"background-color:#C8E6C9;\">aching</span> for Romeo</span>\n",
344
+ " </div>\n",
345
+ " <div class=\"lx-controls\">\n",
346
+ " <div class=\"lx-button-row\">\n",
347
+ " <button class=\"lx-control-btn\" onclick=\"playPause()\">▶️ Play</button>\n",
348
+ " <button class=\"lx-control-btn\" onclick=\"prevExtraction()\">⏮ Previous</button>\n",
349
+ " <button class=\"lx-control-btn\" onclick=\"nextExtraction()\">⏭ Next</button>\n",
350
+ " </div>\n",
351
+ " <div class=\"lx-progress-container\">\n",
352
+ " <input type=\"range\" id=\"progressSlider\" class=\"lx-progress-slider\"\n",
353
+ " min=\"0\" max=\"2\" value=\"0\"\n",
354
+ " onchange=\"jumpToExtraction(this.value)\">\n",
355
+ " </div>\n",
356
+ " <div class=\"lx-status-text\">\n",
357
+ " Entity <span id=\"entityInfo\">1/3</span> |\n",
358
+ " Pos <span id=\"posInfo\">[0-11]</span>\n",
359
+ " </div>\n",
360
+ " </div>\n",
361
+ "</div>\n",
362
+ "\n",
363
+ "<script>\n",
364
+ " (function() {\n",
365
+ " const extractions = [{\"index\": 0, \"class\": \"relationship\", \"text\": \"Lady Juliet and Romeo\", \"color\": \"#FEF0C3\", \"startPos\": 0, \"endPos\": 68, \"beforeText\": \"\", \"extractionText\": \"Lady Juliet gazed longingly at the stars, her heart aching for Romeo\", \"afterText\": \"\", \"attributesHtml\": \"<div><strong>class:</strong> relationship</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">type</span>: <span class=\\\"lx-attr-value\\\">romantic love</span>}</div>\"}, {\"index\": 1, \"class\": \"character\", \"text\": \"Lady Juliet\", \"color\": \"#D2E3FC\", \"startPos\": 0, \"endPos\": 11, \"beforeText\": \"\", \"extractionText\": \"Lady Juliet\", \"afterText\": \" gazed longingly at the stars, her heart aching for Romeo\", \"attributesHtml\": \"<div><strong>class:</strong> character</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">emotional_state</span>: <span class=\\\"lx-attr-value\\\">longing</span>}</div>\"}, {\"index\": 2, \"class\": \"emotion\", \"text\": \"aching\", \"color\": \"#C8E6C9\", \"startPos\": 52, \"endPos\": 58, \"beforeText\": \"Lady Juliet gazed longingly at the stars, her heart \", \"extractionText\": \"aching\", \"afterText\": \" for Romeo\", \"attributesHtml\": \"<div><strong>class:</strong> emotion</div><div><strong>attributes:</strong> {<span class=\\\"lx-attr-key\\\">feeling</span>: <span class=\\\"lx-attr-value\\\">sorrowful desire</span>}</div>\"}];\n",
366
+ " let currentIndex = 0;\n",
367
+ " let isPlaying = false;\n",
368
+ " let animationInterval = null;\n",
369
+ " let animationSpeed = 1.0;\n",
370
+ "\n",
371
+ " function updateDisplay() {\n",
372
+ " const extraction = extractions[currentIndex];\n",
373
+ " if (!extraction) return;\n",
374
+ "\n",
375
+ " document.getElementById('attributesContainer').innerHTML = extraction.attributesHtml;\n",
376
+ " document.getElementById('entityInfo').textContent = (currentIndex + 1) + '/' + extractions.length;\n",
377
+ " document.getElementById('posInfo').textContent = '[' + extraction.startPos + '-' + extraction.endPos + ']';\n",
378
+ " document.getElementById('progressSlider').value = currentIndex;\n",
379
+ "\n",
380
+ " const playBtn = document.querySelector('.lx-control-btn');\n",
381
+ " if (playBtn) playBtn.textContent = isPlaying ? '⏸ Pause' : '▶️ Play';\n",
382
+ "\n",
383
+ " const prevHighlight = document.querySelector('.lx-text-window .lx-current-highlight');\n",
384
+ " if (prevHighlight) prevHighlight.classList.remove('lx-current-highlight');\n",
385
+ " const currentSpan = document.querySelector('.lx-text-window span[data-idx=\"' + currentIndex + '\"]');\n",
386
+ " if (currentSpan) {\n",
387
+ " currentSpan.classList.add('lx-current-highlight');\n",
388
+ " currentSpan.scrollIntoView({block: 'center', behavior: 'smooth'});\n",
389
+ " }\n",
390
+ " }\n",
391
+ "\n",
392
+ " function nextExtraction() {\n",
393
+ " currentIndex = (currentIndex + 1) % extractions.length;\n",
394
+ " updateDisplay();\n",
395
+ " }\n",
396
+ "\n",
397
+ " function prevExtraction() {\n",
398
+ " currentIndex = (currentIndex - 1 + extractions.length) % extractions.length;\n",
399
+ " updateDisplay();\n",
400
+ " }\n",
401
+ "\n",
402
+ " function jumpToExtraction(index) {\n",
403
+ " currentIndex = parseInt(index);\n",
404
+ " updateDisplay();\n",
405
+ " }\n",
406
+ "\n",
407
+ " function playPause() {\n",
408
+ " if (isPlaying) {\n",
409
+ " clearInterval(animationInterval);\n",
410
+ " isPlaying = false;\n",
411
+ " } else {\n",
412
+ " animationInterval = setInterval(nextExtraction, animationSpeed * 1000);\n",
413
+ " isPlaying = true;\n",
414
+ " }\n",
415
+ " updateDisplay();\n",
416
+ " }\n",
417
+ "\n",
418
+ " window.playPause = playPause;\n",
419
+ " window.nextExtraction = nextExtraction;\n",
420
+ " window.prevExtraction = prevExtraction;\n",
421
+ " window.jumpToExtraction = jumpToExtraction;\n",
422
+ "\n",
423
+ " updateDisplay();\n",
424
+ " })();\n",
425
+ "</script>"
426
+ ],
427
+ "text/plain": [
428
+ "<IPython.core.display.HTML object>"
429
+ ]
430
+ },
431
+ "execution_count": 8,
432
+ "metadata": {},
433
+ "output_type": "execute_result"
434
+ }
435
+ ],
436
+ "source": [
437
+ "html_content"
438
+ ]
439
+ },
440
+ {
441
+ "cell_type": "markdown",
442
+ "id": "49ec5f64",
443
+ "metadata": {},
444
+ "source": [
445
+ "# My test"
446
+ ]
447
+ },
448
+ {
449
+ "cell_type": "code",
450
+ "execution_count": 14,
451
+ "id": "2314fae3",
452
+ "metadata": {},
453
+ "outputs": [],
454
+ "source": [
455
+ "import langextract as lx\n",
456
+ "import textwrap"
457
+ ]
458
+ },
459
+ {
460
+ "cell_type": "code",
461
+ "execution_count": 36,
462
+ "id": "2a39a1c0",
463
+ "metadata": {},
464
+ "outputs": [],
465
+ "source": [
466
+ "# 1. Define the prompt and extraction rules\n",
467
+ "prompt = textwrap.dedent(\"\"\"\\\n",
468
+ " フライトの情報です。データの規則性に従い、データを抽出してください。\n",
469
+ " 抽出は、データの順序を保ち、言い換えやパラフレーズを避けてください。\n",
470
+ " 各エンティティには、意味のある属性を追加してコンテキストを提供してください。\n",
471
+ " 出発日、到着日、フライト名などの情報を抽出してください。その際に日付から考えて、出発、到着の順になるように整合性を確認してください。\"\"\")"
472
+ ]
473
+ },
474
+ {
475
+ "cell_type": "code",
476
+ "execution_count": 37,
477
+ "id": "15aa1dd6",
478
+ "metadata": {},
479
+ "outputs": [],
480
+ "source": [
481
+ "# 2. Provide a high-quality example to guide the model\n",
482
+ "examples = [\n",
483
+ " lx.data.ExampleData(\n",
484
+ " text=\"[dat]20250801[nam]taro tanaka[age]20[dat]20250803[fr]cx0520\",\n",
485
+ " extractions=[\n",
486
+ " lx.data.Extraction(\n",
487
+ " extraction_class=\"depature_date\",\n",
488
+ " extraction_text=\"2025/08/01\",\n",
489
+ " ),\n",
490
+ " lx.data.Extraction(\n",
491
+ " extraction_class=\"name\",\n",
492
+ " extraction_text=\"taro tanaka!\",\n",
493
+ " ),\n",
494
+ " lx.data.Extraction(\n",
495
+ " extraction_class=\"arrival_date\",\n",
496
+ " extraction_text=\"2025/08/03\",\n",
497
+ " ),\n",
498
+ " lx.data.Extraction(\n",
499
+ " extraction_class=\"fright_name\",\n",
500
+ " extraction_text=\"cx0520\",\n",
501
+ " ),\n",
502
+ " ]\n",
503
+ " )]"
504
+ ]
505
+ },
506
+ {
507
+ "cell_type": "code",
508
+ "execution_count": 38,
509
+ "id": "82f1b2bf",
510
+ "metadata": {},
511
+ "outputs": [
512
+ {
513
+ "name": "stderr",
514
+ "output_type": "stream",
515
+ "text": [
516
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m71\u001b[0m chars, processed=\u001b[92m71\u001b[0m chars: [00:09]"
517
+ ]
518
+ },
519
+ {
520
+ "name": "stdout",
521
+ "output_type": "stream",
522
+ "text": [
523
+ "\u001b[92m✓\u001b[0m Extraction processing complete\n",
524
+ "\u001b[92m✓\u001b[0m Extracted \u001b[1m4\u001b[0m entities (\u001b[1m4\u001b[0m unique types)\n",
525
+ " \u001b[96m•\u001b[0m Time: \u001b[1m9.18s\u001b[0m\n",
526
+ " \u001b[96m•\u001b[0m Speed: \u001b[1m8\u001b[0m chars/sec\n",
527
+ " \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n"
528
+ ]
529
+ },
530
+ {
531
+ "name": "stderr",
532
+ "output_type": "stream",
533
+ "text": [
534
+ "\n"
535
+ ]
536
+ }
537
+ ],
538
+ "source": [
539
+ "# The input text to be processed\n",
540
+ "input_text = \"[dat]20250804[nam]nakamura john[age]30[dat]20250805[br]cx0009[fr]ar0520\"\n",
541
+ "input_text = \"[dat]20250805[dat]20250804[nam]nakamura john[age]30[br]cx0009[fr]ar0520\"\n",
542
+ "\n",
543
+ "# Run the extraction\n",
544
+ "result = lx.extract(\n",
545
+ " text_or_documents=input_text,\n",
546
+ " prompt_description=prompt,\n",
547
+ " examples=examples,\n",
548
+ " language_model_type=inference.OllamaLanguageModel,\n",
549
+ " model_id=\"gemma2:latest\",\n",
550
+ " model_url=\"http://localhost:11434\"\n",
551
+ ")"
552
+ ]
553
+ },
554
+ {
555
+ "cell_type": "code",
556
+ "execution_count": 34,
557
+ "id": "b6d58afe",
558
+ "metadata": {},
559
+ "outputs": [
560
+ {
561
+ "name": "stderr",
562
+ "output_type": "stream",
563
+ "text": [
564
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Saving to \u001b[92mextraction_results.jsonl\u001b[0m: 1 docs [00:00, 500.10 docs/s]"
565
+ ]
566
+ },
567
+ {
568
+ "name": "stdout",
569
+ "output_type": "stream",
570
+ "text": [
571
+ "\u001b[92m✓\u001b[0m Saved \u001b[1m1\u001b[0m documents to \u001b[92mextraction_results.jsonl\u001b[0m\n"
572
+ ]
573
+ },
574
+ {
575
+ "name": "stderr",
576
+ "output_type": "stream",
577
+ "text": [
578
+ "\n",
579
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Loading \u001b[92mextraction_results.jsonl\u001b[0m: 100%|█████████▉| 997/998 [00:00<00:00, 994kB/s]"
580
+ ]
581
+ },
582
+ {
583
+ "name": "stdout",
584
+ "output_type": "stream",
585
+ "text": [
586
+ "\u001b[92m✓\u001b[0m Loaded \u001b[1m1\u001b[0m documents from \u001b[92mextraction_results.jsonl\u001b[0m\n"
587
+ ]
588
+ },
589
+ {
590
+ "name": "stderr",
591
+ "output_type": "stream",
592
+ "text": [
593
+ "\n"
594
+ ]
595
+ }
596
+ ],
597
+ "source": [
598
+ "# Save the results to a JSONL file\n",
599
+ "from pathlib import Path\n",
600
+ "lx.io.save_annotated_documents([result], output_name=\"extraction_results.jsonl\", output_dir=Path(\".\"))\n",
601
+ "\n",
602
+ "# Generate the visualization from the file\n",
603
+ "html_content = lx.visualize(\"extraction_results.jsonl\")\n"
604
+ ]
605
+ },
606
+ {
607
+ "cell_type": "code",
608
+ "execution_count": 27,
609
+ "id": "0d45589e",
610
+ "metadata": {},
611
+ "outputs": [
612
+ {
613
+ "data": {
614
+ "text/plain": [
615
+ "[Extraction(extraction_class='departure_date', extraction_text='2025/08/04', char_interval=None, alignment_status=None, extraction_index=1, group_index=0, description=None, attributes={}),\n",
616
+ " Extraction(extraction_class='name', extraction_text='nakamura john', char_interval=CharInterval(start_pos=18, end_pos=31), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={}),\n",
617
+ " Extraction(extraction_class='arrival_date', extraction_text='2025/08/05', char_interval=None, alignment_status=None, extraction_index=3, group_index=2, description=None, attributes={}),\n",
618
+ " Extraction(extraction_class='flight_name', extraction_text='cx0009', char_interval=CharInterval(start_pos=55, end_pos=61), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'type': 'departure'}),\n",
619
+ " Extraction(extraction_class='flight_name', extraction_text='ar0520', char_interval=CharInterval(start_pos=65, end_pos=71), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'type': 'arrival'})]"
620
+ ]
621
+ },
622
+ "execution_count": 27,
623
+ "metadata": {},
624
+ "output_type": "execute_result"
625
+ }
626
+ ],
627
+ "source": [
628
+ "result.extractions"
629
+ ]
630
+ },
631
+ {
632
+ "cell_type": "code",
633
+ "execution_count": null,
634
+ "id": "34459cac",
635
+ "metadata": {},
636
+ "outputs": [
637
+ {
638
+ "data": {
639
+ "text/plain": [
640
+ "[Extraction(extraction_class='depature_date', extraction_text='2025/08/05', char_interval=None, alignment_status=None, extraction_index=1, group_index=0, description=None, attributes={}),\n",
641
+ " Extraction(extraction_class='arrival_date', extraction_text='2025/08/04', char_interval=None, alignment_status=None, extraction_index=2, group_index=1, description=None, attributes={}),\n",
642
+ " Extraction(extraction_class='name', extraction_text='nakamura john', char_interval=CharInterval(start_pos=31, end_pos=44), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={}),\n",
643
+ " Extraction(extraction_class='fright_name', extraction_text='cx0009', char_interval=CharInterval(start_pos=55, end_pos=61), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={})]"
644
+ ]
645
+ },
646
+ "execution_count": 39,
647
+ "metadata": {},
648
+ "output_type": "execute_result"
649
+ }
650
+ ],
651
+ "source": [
652
+ "result.extractions"
653
+ ]
654
+ },
655
+ {
656
+ "cell_type": "code",
657
+ "execution_count": null,
658
+ "id": "e2eba844",
659
+ "metadata": {},
660
+ "outputs": [],
661
+ "source": []
662
+ },
663
+ {
664
+ "cell_type": "markdown",
665
+ "id": "14fbd61f",
666
+ "metadata": {},
667
+ "source": [
668
+ "# 階層テスト"
669
+ ]
670
+ },
671
+ {
672
+ "cell_type": "code",
673
+ "execution_count": 43,
674
+ "id": "bd3dfda7",
675
+ "metadata": {},
676
+ "outputs": [
677
+ {
678
+ "name": "stderr",
679
+ "output_type": "stream",
680
+ "text": [
681
+ "\u001b[94m\u001b[1mLangExtract\u001b[0m: Processing, current=\u001b[92m40\u001b[0m chars, processed=\u001b[92m40\u001b[0m chars: [00:21]"
682
+ ]
683
+ },
684
+ {
685
+ "name": "stdout",
686
+ "output_type": "stream",
687
+ "text": [
688
+ "\u001b[92m✓\u001b[0m Extraction processing complete\n",
689
+ "\u001b[92m✓\u001b[0m Extracted \u001b[1m5\u001b[0m entities (\u001b[1m1\u001b[0m unique types)\n",
690
+ " \u001b[96m•\u001b[0m Time: \u001b[1m21.95s\u001b[0m\n",
691
+ " \u001b[96m•\u001b[0m Speed: \u001b[1m2\u001b[0m chars/sec\n",
692
+ " \u001b[96m•\u001b[0m Chunks: \u001b[1m1\u001b[0m\n",
693
+ "AnnotatedDocument(extractions=[Extraction(extraction_class='heading', extraction_text='第2章:分析', char_interval=CharInterval(start_pos=0, end_pos=6), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'level': 1, 'children': ['2.1 データ', '2.2 結果']}), Extraction(extraction_class='heading', extraction_text='2.1 データ', char_interval=CharInterval(start_pos=7, end_pos=14), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'level': 2, 'children': ['2.1.1 収集', '2.1.2 前処理']}), Extraction(extraction_class='heading', extraction_text='2.1.1 収集', char_interval=CharInterval(start_pos=15, end_pos=23), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={'level': 3, 'children': []}), Extraction(extraction_class='heading', extraction_text='2.1.2 前処理', char_interval=CharInterval(start_pos=24, end_pos=33), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'level': 3, 'children': []}), Extraction(extraction_class='heading', extraction_text='2.2 結果', char_interval=CharInterval(start_pos=34, end_pos=40), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'level': 2, 'children': []})], text='第2章:分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果')\n"
694
+ ]
695
+ },
696
+ {
697
+ "name": "stderr",
698
+ "output_type": "stream",
699
+ "text": [
700
+ "\n"
701
+ ]
702
+ }
703
+ ],
704
+ "source": [
705
+ "import langextract as lx\n",
706
+ "import textwrap\n",
707
+ "\n",
708
+ "prompt = textwrap.dedent(\"\"\"\\\n",
709
+ "以下の文章から「見出し階層」を抽出してください。\n",
710
+ "各階層は JSON にネストされた children リストで表現します。\n",
711
+ "出力の構造を見本にならって厳密に守ってください。\"\"\")\n",
712
+ "\n",
713
+ "# ツリー構造の具体例\n",
714
+ "examples = [\n",
715
+ " # 単一階層\n",
716
+ " lx.data.ExampleData(\n",
717
+ " text=\"第1章:概要\",\n",
718
+ " extractions=[\n",
719
+ " lx.data.Extraction(\n",
720
+ " extraction_class=\"heading\",\n",
721
+ " extraction_text=\"第1章:概要\",\n",
722
+ " attributes={\"level\": 1, \"children\": []}\n",
723
+ " )\n",
724
+ " ]\n",
725
+ " ),\n",
726
+ " # 2階層あり\n",
727
+ " lx.data.ExampleData(\n",
728
+ " text=\"第1章:概要\\n1.1 背景\\n1.2 目的\",\n",
729
+ " extractions=[\n",
730
+ " lx.data.Extraction(\"heading\",\n",
731
+ " \"第1章:概要\",\n",
732
+ " attributes={\"level\": 1, \"children\": [\"1.1 背景\", \"1.2 目的\"]}),\n",
733
+ " lx.data.Extraction(\"heading\", \"1.1 背景\", attributes={\"level\": 2, \"children\": []}),\n",
734
+ " lx.data.Extraction(\"heading\", \"1.2 目的\", attributes={\"level\": 2, \"children\": []}),\n",
735
+ " ]\n",
736
+ " ),\n",
737
+ " # 3階層と子無しケース\n",
738
+ " lx.data.ExampleData(\n",
739
+ " text=\"第2章:分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果\",\n",
740
+ " extractions=[\n",
741
+ " lx.data.Extraction(\"heading\", \"第2章:分析\", attributes={\"level\": 1, \"children\": [\"2.1 データ\", \"2.2 結果\"]}),\n",
742
+ " lx.data.Extraction(\"heading\", \"2.1 データ\", attributes={\"level\": 2, \"children\": [\"2.1.1 収集\", \"2.1.2 前処理\"]}),\n",
743
+ " lx.data.Extraction(\"heading\", \"2.1.1 収集\", attributes={\"level\": 3, \"children\": []}),\n",
744
+ " lx.data.Extraction(\"heading\", \"2.1.2 前処理\", attributes={\"level\": 3, \"children\": []}),\n",
745
+ " lx.data.Extraction(\"heading\", \"2.2 結果\", attributes={\"level\": 2, \"children\": []}),\n",
746
+ " ]\n",
747
+ " )\n",
748
+ "]\n",
749
+ "\n",
750
+ "result = lx.extract(\n",
751
+ " text_or_documents=\"第2章:分析\\n2.1 データ\\n2.1.1 収集\\n2.1.2 前処理\\n2.2 結果\",\n",
752
+ " prompt_description=prompt,\n",
753
+ " examples=examples,\n",
754
+ " language_model_type=inference.OllamaLanguageModel,\n",
755
+ " model_id=\"gemma2:latest\",\n",
756
+ " model_url=\"http://localhost:11434\"\n",
757
+ ")\n",
758
+ "\n",
759
+ "print(result)\n"
760
+ ]
761
+ },
762
+ {
763
+ "cell_type": "code",
764
+ "execution_count": 45,
765
+ "id": "d355c87a",
766
+ "metadata": {},
767
+ "outputs": [
768
+ {
769
+ "data": {
770
+ "text/plain": [
771
+ "[Extraction(extraction_class='heading', extraction_text='第2章:分析', char_interval=CharInterval(start_pos=0, end_pos=6), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'level': 1, 'children': ['2.1 データ', '2.2 結果']}),\n",
772
+ " Extraction(extraction_class='heading', extraction_text='2.1 データ', char_interval=CharInterval(start_pos=7, end_pos=14), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=2, group_index=1, description=None, attributes={'level': 2, 'children': ['2.1.1 収集', '2.1.2 前処理']}),\n",
773
+ " Extraction(extraction_class='heading', extraction_text='2.1.1 収集', char_interval=CharInterval(start_pos=15, end_pos=23), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=3, group_index=2, description=None, attributes={'level': 3, 'children': []}),\n",
774
+ " Extraction(extraction_class='heading', extraction_text='2.1.2 前処理', char_interval=CharInterval(start_pos=24, end_pos=33), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=4, group_index=3, description=None, attributes={'level': 3, 'children': []}),\n",
775
+ " Extraction(extraction_class='heading', extraction_text='2.2 結果', char_interval=CharInterval(start_pos=34, end_pos=40), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=5, group_index=4, description=None, attributes={'level': 2, 'children': []})]"
776
+ ]
777
+ },
778
+ "execution_count": 45,
779
+ "metadata": {},
780
+ "output_type": "execute_result"
781
+ }
782
+ ],
783
+ "source": [
784
+ "result.extractions"
785
+ ]
786
+ }
787
+ ],
788
+ "metadata": {
789
+ "kernelspec": {
790
+ "display_name": "Python 3",
791
+ "language": "python",
792
+ "name": "python3"
793
+ },
794
+ "language_info": {
795
+ "codemirror_mode": {
796
+ "name": "ipython",
797
+ "version": 3
798
+ },
799
+ "file_extension": ".py",
800
+ "mimetype": "text/x-python",
801
+ "name": "python",
802
+ "nbconvert_exporter": "python",
803
+ "pygments_lexer": "ipython3",
804
+ "version": "3.12.9"
805
+ }
806
+ },
807
+ "nbformat": 4,
808
+ "nbformat_minor": 5
809
+ }
20250803_langextract/visualization.html ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <style>
2
+ .lx-highlight { position: relative; border-radius:3px; padding:1px 2px;}
3
+ .lx-highlight .lx-tooltip {
4
+ visibility: hidden;
5
+ opacity: 0;
6
+ transition: opacity 0.2s ease-in-out;
7
+ background: #333;
8
+ color: #fff;
9
+ text-align: left;
10
+ border-radius: 4px;
11
+ padding: 6px 8px;
12
+ position: absolute;
13
+ z-index: 1000;
14
+ bottom: 125%;
15
+ left: 50%;
16
+ transform: translateX(-50%);
17
+ font-size: 12px;
18
+ max-width: 240px;
19
+ white-space: normal;
20
+ box-shadow: 0 2px 6px rgba(0,0,0,0.3);
21
+ }
22
+ .lx-highlight:hover .lx-tooltip { visibility: visible; opacity:1; }
23
+ .lx-animated-wrapper { max-width: 100%; font-family: Arial, sans-serif; }
24
+ .lx-controls {
25
+ background: #fafafa; border: 1px solid #90caf9; border-radius: 8px;
26
+ padding: 12px; margin-bottom: 16px;
27
+ }
28
+ .lx-button-row {
29
+ display: flex; justify-content: center; gap: 8px; margin-bottom: 12px;
30
+ }
31
+ .lx-control-btn {
32
+ background: #4285f4; color: white; border: none; border-radius: 4px;
33
+ padding: 8px 16px; cursor: pointer; font-size: 13px; font-weight: 500;
34
+ transition: background-color 0.2s;
35
+ }
36
+ .lx-control-btn:hover { background: #3367d6; }
37
+ .lx-progress-container {
38
+ margin-bottom: 8px;
39
+ }
40
+ .lx-progress-slider {
41
+ width: 100%; margin: 0; appearance: none; height: 6px;
42
+ background: #ddd; border-radius: 3px; outline: none;
43
+ }
44
+ .lx-progress-slider::-webkit-slider-thumb {
45
+ appearance: none; width: 18px; height: 18px; background: #4285f4;
46
+ border-radius: 50%; cursor: pointer;
47
+ }
48
+ .lx-progress-slider::-moz-range-thumb {
49
+ width: 18px; height: 18px; background: #4285f4; border-radius: 50%;
50
+ cursor: pointer; border: none;
51
+ }
52
+ .lx-status-text {
53
+ text-align: center; font-size: 12px; color: #666; margin-top: 4px;
54
+ }
55
+ .lx-text-window {
56
+ font-family: monospace; white-space: pre-wrap; border: 1px solid #90caf9;
57
+ padding: 12px; max-height: 260px; overflow-y: auto; margin-bottom: 12px;
58
+ line-height: 1.6;
59
+ }
60
+ .lx-attributes-panel {
61
+ background: #fafafa; border: 1px solid #90caf9; border-radius: 6px;
62
+ padding: 8px 10px; margin-top: 8px; font-size: 13px;
63
+ }
64
+ .lx-current-highlight {
65
+ text-decoration: underline;
66
+ text-decoration-color: #ff4444;
67
+ text-decoration-thickness: 3px;
68
+ font-weight: bold;
69
+ animation: lx-pulse 1s ease-in-out;
70
+ }
71
+ @keyframes lx-pulse {
72
+ 0% { text-decoration-color: #ff4444; }
73
+ 50% { text-decoration-color: #ff0000; }
74
+ 100% { text-decoration-color: #ff4444; }
75
+ }
76
+ .lx-legend {
77
+ font-size: 12px; margin-bottom: 8px;
78
+ padding-bottom: 8px; border-bottom: 1px solid #e0e0e0;
79
+ }
80
+ .lx-label {
81
+ display: inline-block;
82
+ padding: 2px 4px;
83
+ border-radius: 3px;
84
+ margin-right: 4px;
85
+ color: #000;
86
+ }
87
+ .lx-attr-key {
88
+ font-weight: 600;
89
+ color: #1565c0;
90
+ letter-spacing: 0.3px;
91
+ }
92
+ .lx-attr-value {
93
+ font-weight: 400;
94
+ opacity: 0.85;
95
+ letter-spacing: 0.2px;
96
+ }
97
+
98
+ /* Add optimizations with larger fonts and better readability for GIFs */
99
+ .lx-gif-optimized .lx-text-window { font-size: 16px; line-height: 1.8; }
100
+ .lx-gif-optimized .lx-attributes-panel { font-size: 15px; }
101
+ .lx-gif-optimized .lx-current-highlight { text-decoration-thickness: 4px; }
102
+ </style>
103
+ <div class="lx-animated-wrapper lx-gif-optimized">
104
+ <div class="lx-attributes-panel">
105
+ <div class="lx-legend">Highlights Legend: <span class="lx-label" style="background-color:#D2E3FC;">character</span> <span class="lx-label" style="background-color:#C8E6C9;">emotion</span> <span class="lx-label" style="background-color:#FEF0C3;">relationship</span></div>
106
+ <div id="attributesContainer"></div>
107
+ </div>
108
+ <div class="lx-text-window" id="textWindow">
109
+ <span class="lx-highlight lx-current-highlight" data-idx="0" style="background-color:#FEF0C3;"><span class="lx-highlight" data-idx="1" style="background-color:#D2E3FC;">Lady Juliet</span> gazed longingly at the stars, her heart <span class="lx-highlight" data-idx="2" style="background-color:#C8E6C9;">aching</span> for Romeo</span>
110
+ </div>
111
+ <div class="lx-controls">
112
+ <div class="lx-button-row">
113
+ <button class="lx-control-btn" onclick="playPause()">▶️ Play</button>
114
+ <button class="lx-control-btn" onclick="prevExtraction()">⏮ Previous</button>
115
+ <button class="lx-control-btn" onclick="nextExtraction()">⏭ Next</button>
116
+ </div>
117
+ <div class="lx-progress-container">
118
+ <input type="range" id="progressSlider" class="lx-progress-slider"
119
+ min="0" max="2" value="0"
120
+ onchange="jumpToExtraction(this.value)">
121
+ </div>
122
+ <div class="lx-status-text">
123
+ Entity <span id="entityInfo">1/3</span> |
124
+ Pos <span id="posInfo">[0-11]</span>
125
+ </div>
126
+ </div>
127
+ </div>
128
+
129
+ <script>
130
+ (function() {
131
+ const extractions = [{"index": 0, "class": "relationship", "text": "Lady Juliet and Romeo", "color": "#FEF0C3", "startPos": 0, "endPos": 68, "beforeText": "", "extractionText": "Lady Juliet gazed longingly at the stars, her heart aching for Romeo", "afterText": "", "attributesHtml": "<div><strong>class:</strong> relationship</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">type</span>: <span class=\"lx-attr-value\">romantic love</span>}</div>"}, {"index": 1, "class": "character", "text": "Lady Juliet", "color": "#D2E3FC", "startPos": 0, "endPos": 11, "beforeText": "", "extractionText": "Lady Juliet", "afterText": " gazed longingly at the stars, her heart aching for Romeo", "attributesHtml": "<div><strong>class:</strong> character</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">emotional_state</span>: <span class=\"lx-attr-value\">longing</span>}</div>"}, {"index": 2, "class": "emotion", "text": "aching", "color": "#C8E6C9", "startPos": 52, "endPos": 58, "beforeText": "Lady Juliet gazed longingly at the stars, her heart ", "extractionText": "aching", "afterText": " for Romeo", "attributesHtml": "<div><strong>class:</strong> emotion</div><div><strong>attributes:</strong> {<span class=\"lx-attr-key\">feeling</span>: <span class=\"lx-attr-value\">sorrowful desire</span>}</div>"}];
132
+ let currentIndex = 0;
133
+ let isPlaying = false;
134
+ let animationInterval = null;
135
+ let animationSpeed = 1.0;
136
+
137
+ function updateDisplay() {
138
+ const extraction = extractions[currentIndex];
139
+ if (!extraction) return;
140
+
141
+ document.getElementById('attributesContainer').innerHTML = extraction.attributesHtml;
142
+ document.getElementById('entityInfo').textContent = (currentIndex + 1) + '/' + extractions.length;
143
+ document.getElementById('posInfo').textContent = '[' + extraction.startPos + '-' + extraction.endPos + ']';
144
+ document.getElementById('progressSlider').value = currentIndex;
145
+
146
+ const playBtn = document.querySelector('.lx-control-btn');
147
+ if (playBtn) playBtn.textContent = isPlaying ? '⏸ Pause' : '▶️ Play';
148
+
149
+ const prevHighlight = document.querySelector('.lx-text-window .lx-current-highlight');
150
+ if (prevHighlight) prevHighlight.classList.remove('lx-current-highlight');
151
+ const currentSpan = document.querySelector('.lx-text-window span[data-idx="' + currentIndex + '"]');
152
+ if (currentSpan) {
153
+ currentSpan.classList.add('lx-current-highlight');
154
+ currentSpan.scrollIntoView({block: 'center', behavior: 'smooth'});
155
+ }
156
+ }
157
+
158
+ function nextExtraction() {
159
+ currentIndex = (currentIndex + 1) % extractions.length;
160
+ updateDisplay();
161
+ }
162
+
163
+ function prevExtraction() {
164
+ currentIndex = (currentIndex - 1 + extractions.length) % extractions.length;
165
+ updateDisplay();
166
+ }
167
+
168
+ function jumpToExtraction(index) {
169
+ currentIndex = parseInt(index);
170
+ updateDisplay();
171
+ }
172
+
173
+ function playPause() {
174
+ if (isPlaying) {
175
+ clearInterval(animationInterval);
176
+ isPlaying = false;
177
+ } else {
178
+ animationInterval = setInterval(nextExtraction, animationSpeed * 1000);
179
+ isPlaying = true;
180
+ }
181
+ updateDisplay();
182
+ }
183
+
184
+ window.playPause = playPause;
185
+ window.nextExtraction = nextExtraction;
186
+ window.prevExtraction = prevExtraction;
187
+ window.jumpToExtraction = jumpToExtraction;
188
+
189
+ updateDisplay();
190
+ })();
191
+ </script>