MultiVLM-OCR

Running on Zero

App Files Files Community

Geraldine commited on 22 days ago

Commit

ef0156e

verified ·

1 Parent(s): 006f21c

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -177,27 +177,21 @@ MODEL_MAP = {
 MODEL_CHOICES = list(MODEL_MAP.keys())
 PROMPTS = {
-    "GENERAL": {
-        "name": "General Text Extraction",
-        "description": "Extract all text from this cover page",
-        "prompt": "Extract all text from this image. Preserve the layout and structure. Output plain text.",
-        "icon": "📄"
-    },
-    "MARKDOWN": {
-        "name": "Simple Markdown Conversion",
-        "description": "Convert document to Markdown format",
-        "prompt": "Convert this document to Markdown. Preserve headings, lists, and formatting.",
         "icon": "📝"
     },
-    "MARKDOWN_OCR": {
-        "name": "Markdown OCR",
         "description": "Perform OCR and convert to Markdown",
         "prompt": "Perform OCR including inside images and logos and convert to Markdown.",
         "icon": "🔍"
     },
-    "TITLE_JSON": {
-        "name": "Title JSON",
-        "description": "Extract title as JSON",
         "prompt": """Extract the document title from this cover page.
 Output ONLY valid JSON:
 {
@@ -205,9 +199,9 @@ Output ONLY valid JSON:
 }""",
         "icon": "🏷️"
     },
-    "LOCATED_TITLE_JSON": {
-        "name": "Located Title JSON",
-        "description": "Extract located title as JSON",
         "prompt": """Extract the document title from the middle central block of this cover page.
 Output ONLY valid JSON:
 {
@@ -215,9 +209,9 @@ Output ONLY valid JSON:
 }""",
         "icon": "📍"
     },
-    "GROUNDED_TITLE_JSON": {
-        "name": "Grounded Title JSON",
-        "description": "Extract grounded title as JSON",
         "prompt": """Extract the document title usually located around (x=0.5015, y=0.442) from this cover page.
 Output ONLY valid JSON:
 {
@@ -283,11 +277,11 @@ Return ONLY the JSON, no explanation.""",
 }
 image_examples = [
-    {"query": PROMPTS["GENERAL"]["prompt"], "image": "examples/dumas_01382452.png", "model": "Nanonets-OCR2-3B"},
-    {"query": PROMPTS["MARKDOWN_OCR"]["prompt"], "image": "examples/dumas_01646440.png", "model": "olmOCR-7B-0725"},
-    {"query": PROMPTS["TITLE_JSON"]["prompt"], "image": "examples/ephesvt_theses_doc13.jpg", "model": "Qwen3-VL-4B-Instruct"},
-    {"query": PROMPTS["LOCATED_TITLE_JSON"]["prompt"], "image": "examples/memoires_cridaf_doc07.jpg", "model": "Qwen2-VL-OCR-2B"},
-    {"query": PROMPTS["GROUNDED_TITLE_JSON"]["prompt"], "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
     {"query": "", "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
 ]

 MODEL_CHOICES = list(MODEL_MAP.keys())
 PROMPTS = {
+    "OCR_GENERAL": {
+        "name": "Simple Text Etraction",
+        "description": "Extract the text including inside images and logos",
+        "prompt": "Extract the text including inside images and logos",
         "icon": "📝"
     },
+    "OCR_MARKDOWN": {
+        "name": "OCR -> Markdown",
         "description": "Perform OCR and convert to Markdown",
         "prompt": "Perform OCR including inside images and logos and convert to Markdown.",
         "icon": "🔍"
     },
+    "STRUCTURED_EXTRACTION": {
+        "name": "Json Metadata Extraction",
+        "description": "Extract metadata",
         "prompt": """Extract the document title from this cover page.
 Output ONLY valid JSON:
 {
 }""",
         "icon": "🏷️"
     },
+    "STRUCTURED_LOCATED_EXTRACTION": {
+        "name": "Located Json Metadata Extraction",
+        "description": "Extract located metadata",
         "prompt": """Extract the document title from the middle central block of this cover page.
 Output ONLY valid JSON:
 {
 }""",
         "icon": "📍"
     },
+    "STRUCTURED_GROUNDED_EXTRACTION": {
+        "name": "Grounded Json Metadata Extraction",
+        "description": "Extract grounded  metadata",
         "prompt": """Extract the document title usually located around (x=0.5015, y=0.442) from this cover page.
 Output ONLY valid JSON:
 {
 }
 image_examples = [
+    {"query": PROMPTS["OCR_GENERAL"]["prompt"], "image": "examples/dumas_01382452.png", "model": "Nanonets-OCR2-3B"},
+    {"query": PROMPTS["OCR_MARKDOWN"]["prompt"], "image": "examples/dumas_01646440.png", "model": "olmOCR-7B-0725"},
+    {"query": PROMPTS["STRUCTURED_EXTRACTION"]["prompt"], "image": "examples/ephesvt_theses_doc13.jpg", "model": "Qwen3-VL-4B-Instruct"},
+    {"query": PROMPTS["STRUCTURED_LOCATED_EXTRACTION"]["prompt"], "image": "examples/memoires_cridaf_doc07.jpg", "model": "Qwen2-VL-OCR-2B"},
+    {"query": PROMPTS["STRUCTURED_GROUNDED_EXTRACTION"]["prompt"], "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
     {"query": "", "image": "examples/thesefr_2015PA010690.png", "model": "LightOnOCR-2-1B"},
 ]