yahtzee commited on
Commit
373717f
·
verified ·
1 Parent(s): c6a49f5

remove "PDF" and rule about not processing image within a "Picture"

Browse files
Files changed (1) hide show
  1. app.py +1 -2
app.py CHANGED
@@ -22,14 +22,13 @@ MAX_PIXELS = 11289600
22
  IMAGE_FACTOR = 28
23
 
24
  # Prompts
25
- prompt = """Please output the layout information from the PDF image, including each layout element's bbox, its category, and the corresponding text content within the bbox.
26
 
27
  1. Bbox format: [x1, y1, x2, y2]
28
 
29
  2. Layout Categories: The possible categories are ['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title'].
30
 
31
  3. Text Extraction & Formatting Rules:
32
- - Picture: For the 'Picture' category, the text field should be omitted.
33
  - Formula: Format its text as LaTeX.
34
  - Table: Format its text as HTML.
35
  - All Others (Text, Title, etc.): Format their text as Markdown.
 
22
  IMAGE_FACTOR = 28
23
 
24
  # Prompts
25
+ prompt = """Please output the layout information from the image, including each layout element's bbox, its category, and the corresponding text content within the bbox.
26
 
27
  1. Bbox format: [x1, y1, x2, y2]
28
 
29
  2. Layout Categories: The possible categories are ['Caption', 'Footnote', 'Formula', 'List-item', 'Page-footer', 'Page-header', 'Picture', 'Section-header', 'Table', 'Text', 'Title'].
30
 
31
  3. Text Extraction & Formatting Rules:
 
32
  - Formula: Format its text as LaTeX.
33
  - Table: Format its text as HTML.
34
  - All Others (Text, Title, etc.): Format their text as Markdown.