Spaces:

tachiwin
/

multilingual_ocr

Running

App Files Files Community

Luis J Camargo commited on Dec 22, 2025

Commit

d125128

1 Parent(s): cb29984

demo images and output text

Browse files

Files changed (9) hide show

app.py +27 -32
cco.jpg +0 -0
cnt.jpg +0 -0
cuc.jpg +0 -0
maj.jpg +0 -0
mir.jpg +0 -0
ote.jpg +0 -0
otm.jpg +0 -0
tku.jpg +0 -0

app.py CHANGED Viewed

@@ -93,35 +93,25 @@ def inference(img):
         if not result or len(result) == 0:
             return "No text detected in the image."
-        # Debug: Check result structure
-        print(f"Result type: {type(result)}")
-        print(f"Result content: {result}")
-        # Extract text and format as markdown table
-        output_lines = ["# Extracted Text\n"]
-        output_lines.append("| Text | Confidence |")
-        output_lines.append("|------|-----------|")
-        # Handle different result formats from PaddleOCRVL
-        if isinstance(result, list):
-            for item in result:
-                if isinstance(item, dict):
-                    # If result is a dict with 'text' and 'confidence'
-                    text = item.get('text', str(item))
-                    confidence = item.get('confidence', 1.0)
-                    output_lines.append(f"| {text} | {confidence:.2%} |")
-                elif isinstance(item, (list, tuple)) and len(item) >= 2:
-                    # If result is like [(bbox, (text, confidence)), ...]
-                    text = item[1][0] if isinstance(item[1], (list, tuple)) else str(item[1])
-                    confidence = item[1][1] if isinstance(item[1], (list, tuple)) and len(item[1]) > 1 else 1.0
-                    output_lines.append(f"| {text} | {confidence:.2%} |")
-                else:
-                    # Fallback: just show the item
-                    output_lines.append(f"| {str(item)} | N/A |")
-        else:
-            output_lines.append(f"| {str(result)} | N/A |")
-        return "\n".join(output_lines)
     except Exception as e:
         import traceback
@@ -145,18 +135,23 @@ detect and recognize the text.
 '''
 examples = [
-    ['example_nahuatl.jpg'],
-    ['example_maya.jpg'],
-    ['example_zapoteco.jpg'],
 ]
 example_labels = """
 ### Example Images:
 | Image | Language | Description |
 |-------|----------|-------------|
-| example_nahuatl.jpg | Náhuatl | Classical Nahuatl text with traditional glyphs |
-| example_maya.jpg | Maya (Yucatec) | Contemporary Maya writing with diacritics |
-| example_zapoteco.jpg | Zapoteco (Istmo) | Zapotec text from Oaxaca region |
 """
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"

         if not result or len(result) == 0:
             return "No text detected in the image."
+        # Extract only the text content from PaddleOCRVL result
+        extracted_texts = []
+        for item in result:
+            if isinstance(item, dict):
+                # Look for 'layout_parsing_res' which contains the actual text blocks
+                if 'layout_parsing_res' in item:
+                    for block in item['layout_parsing_res']:
+                        if 'content' in block:
+                            extracted_texts.append(block['content'])
+                # Fallback: look for 'content' directly
+                elif 'content' in item:
+                    extracted_texts.append(item['content'])
+        if not extracted_texts:
+            return "No text could be extracted from the image."
+        # Join all text blocks with double newlines
+        return "\n\n".join(extracted_texts)
     except Exception as e:
         import traceback
 '''
 examples = [
+    ['cco.jpg'],
+    ['cnt.jpg'],
+    ['cuc.jpg'],
+    ['maj.jpg'],
+    ['mir.jpg'],
+    ['ote.jpg'],
+    ['otm.jpg'],
+    ['tku.jpg'],
 ]
 example_labels = """
 ### Example Images:
 | Image | Language | Description |
 |-------|----------|-------------|
+| cco.jpg | Comaltepec Chinantec | Classical Nahuatl text with traditional glyphs |
+| cnt.jpg | Tepetotutla Chiantec | Contemporary Maya writing with diacritics |
+| cuc.jpg | Usila Chinantec | Zapotec text from Oaxaca region |
 """
 css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"

cco.jpg ADDED Viewed

cnt.jpg ADDED Viewed

cuc.jpg ADDED Viewed

maj.jpg ADDED Viewed

mir.jpg ADDED Viewed

ote.jpg ADDED Viewed

otm.jpg ADDED Viewed

tku.jpg ADDED Viewed