Luis J Camargo commited on
Commit
d125128
·
1 Parent(s): cb29984

demo images and output text

Browse files
Files changed (9) hide show
  1. app.py +27 -32
  2. cco.jpg +0 -0
  3. cnt.jpg +0 -0
  4. cuc.jpg +0 -0
  5. maj.jpg +0 -0
  6. mir.jpg +0 -0
  7. ote.jpg +0 -0
  8. otm.jpg +0 -0
  9. tku.jpg +0 -0
app.py CHANGED
@@ -93,35 +93,25 @@ def inference(img):
93
  if not result or len(result) == 0:
94
  return "No text detected in the image."
95
 
96
- # Debug: Check result structure
97
- print(f"Result type: {type(result)}")
98
- print(f"Result content: {result}")
99
 
100
- # Extract text and format as markdown table
101
- output_lines = ["# Extracted Text\n"]
102
- output_lines.append("| Text | Confidence |")
103
- output_lines.append("|------|-----------|")
 
 
 
 
 
 
104
 
105
- # Handle different result formats from PaddleOCRVL
106
- if isinstance(result, list):
107
- for item in result:
108
- if isinstance(item, dict):
109
- # If result is a dict with 'text' and 'confidence'
110
- text = item.get('text', str(item))
111
- confidence = item.get('confidence', 1.0)
112
- output_lines.append(f"| {text} | {confidence:.2%} |")
113
- elif isinstance(item, (list, tuple)) and len(item) >= 2:
114
- # If result is like [(bbox, (text, confidence)), ...]
115
- text = item[1][0] if isinstance(item[1], (list, tuple)) else str(item[1])
116
- confidence = item[1][1] if isinstance(item[1], (list, tuple)) and len(item[1]) > 1 else 1.0
117
- output_lines.append(f"| {text} | {confidence:.2%} |")
118
- else:
119
- # Fallback: just show the item
120
- output_lines.append(f"| {str(item)} | N/A |")
121
- else:
122
- output_lines.append(f"| {str(result)} | N/A |")
123
 
124
- return "\n".join(output_lines)
 
125
 
126
  except Exception as e:
127
  import traceback
@@ -145,18 +135,23 @@ detect and recognize the text.
145
  '''
146
 
147
  examples = [
148
- ['example_nahuatl.jpg'],
149
- ['example_maya.jpg'],
150
- ['example_zapoteco.jpg'],
 
 
 
 
 
151
  ]
152
 
153
  example_labels = """
154
  ### Example Images:
155
  | Image | Language | Description |
156
  |-------|----------|-------------|
157
- | example_nahuatl.jpg | Náhuatl | Classical Nahuatl text with traditional glyphs |
158
- | example_maya.jpg | Maya (Yucatec) | Contemporary Maya writing with diacritics |
159
- | example_zapoteco.jpg | Zapoteco (Istmo) | Zapotec text from Oaxaca region |
160
  """
161
 
162
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
 
93
  if not result or len(result) == 0:
94
  return "No text detected in the image."
95
 
96
+ # Extract only the text content from PaddleOCRVL result
97
+ extracted_texts = []
 
98
 
99
+ for item in result:
100
+ if isinstance(item, dict):
101
+ # Look for 'layout_parsing_res' which contains the actual text blocks
102
+ if 'layout_parsing_res' in item:
103
+ for block in item['layout_parsing_res']:
104
+ if 'content' in block:
105
+ extracted_texts.append(block['content'])
106
+ # Fallback: look for 'content' directly
107
+ elif 'content' in item:
108
+ extracted_texts.append(item['content'])
109
 
110
+ if not extracted_texts:
111
+ return "No text could be extracted from the image."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
+ # Join all text blocks with double newlines
114
+ return "\n\n".join(extracted_texts)
115
 
116
  except Exception as e:
117
  import traceback
 
135
  '''
136
 
137
  examples = [
138
+ ['cco.jpg'],
139
+ ['cnt.jpg'],
140
+ ['cuc.jpg'],
141
+ ['maj.jpg'],
142
+ ['mir.jpg'],
143
+ ['ote.jpg'],
144
+ ['otm.jpg'],
145
+ ['tku.jpg'],
146
  ]
147
 
148
  example_labels = """
149
  ### Example Images:
150
  | Image | Language | Description |
151
  |-------|----------|-------------|
152
+ | cco.jpg | Comaltepec Chinantec | Classical Nahuatl text with traditional glyphs |
153
+ | cnt.jpg | Tepetotutla Chiantec | Contemporary Maya writing with diacritics |
154
+ | cuc.jpg | Usila Chinantec | Zapotec text from Oaxaca region |
155
  """
156
 
157
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
cco.jpg ADDED
cnt.jpg ADDED
cuc.jpg ADDED
maj.jpg ADDED
mir.jpg ADDED
ote.jpg ADDED
otm.jpg ADDED
tku.jpg ADDED