Update README.md
Browse files
README.md
CHANGED
|
@@ -289,18 +289,18 @@ pil_image = Image.open("demo_data/demo.png").convert("RGB")
|
|
| 289 |
min_pixels = 2048 # 32 * 64
|
| 290 |
max_pixels = 16777216 # 4096 * 4096
|
| 291 |
prompt = """
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
4.
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
"""
|
| 305 |
|
| 306 |
messages = [
|
|
|
|
| 289 |
min_pixels = 2048 # 32 * 64
|
| 290 |
max_pixels = 16777216 # 4096 * 4096
|
| 291 |
prompt = """
|
| 292 |
+
- Extract layout information from the provided PDF image.
|
| 293 |
+
- For each layout element, output its bbox, category, and the text content within the bbox.
|
| 294 |
+
- Bbox format: [x1, y1, x2, y2].
|
| 295 |
+
- Allowed layout categories: ['header', 'title', 'text', 'figure', 'table', 'formula', 'figure_caption', 'table_caption', 'formula_caption', 'figure_footnote', 'table_footnote', 'page_footnote', 'footer'].
|
| 296 |
+
- Text extraction and formatting:
|
| 297 |
+
1) For 'figure', the text field must be an empty string.
|
| 298 |
+
2) For 'formula', format text as LaTeX.
|
| 299 |
+
3) For 'table', format text as HTML.
|
| 300 |
+
4) For all other categories (e.g., text, title), format text as Markdown.
|
| 301 |
+
- The output text must be exactly the original text from the image, with no translation or rewriting.
|
| 302 |
+
- Sort all layout elements in human reading order.
|
| 303 |
+
- Final output must be a single JSON object.
|
| 304 |
"""
|
| 305 |
|
| 306 |
messages = [
|