Luis J Camargo commited on
Commit
281d052
·
1 Parent(s): 5dab21c
Files changed (1) hide show
  1. app.py +30 -22
app.py CHANGED
@@ -89,31 +89,39 @@ def inference(img):
89
 
90
  try:
91
  result = model_manager.infer(img)
92
- print(result)
93
-
94
  if not result or len(result) == 0:
95
  return "No text detected in the image."
96
 
97
- # Debug: Print full result as JSON to see structure
98
- import json
 
99
 
100
- def serialize_for_json(obj):
101
- """Convert non-serializable objects to strings"""
102
- if isinstance(obj, dict):
103
- return {k: serialize_for_json(v) for k, v in obj.items()}
104
- elif isinstance(obj, list):
105
- return [serialize_for_json(item) for item in obj]
106
- elif hasattr(obj, '__dict__'):
107
- return serialize_for_json(obj.__dict__)
108
- elif isinstance(obj, (str, int, float, bool, type(None))):
109
- return obj
110
- else:
111
- return str(type(obj))
112
 
113
- serialized_result = serialize_for_json(result)
114
- json_output = json.dumps(serialized_result, indent=2, ensure_ascii=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
- return f"```json\n{json_output}\n```"
117
 
118
  """ if not result or len(result) == 0:
119
  return "No text detected in the image."
@@ -162,7 +170,6 @@ detect and recognize the text.
162
  examples = [
163
  ['cco.jpg'],
164
  ['cnt.jpg'],
165
- ['cuc.jpg'],
166
  ['maj.jpg'],
167
  ['mir.jpg'],
168
  ['ote.jpg'],
@@ -174,11 +181,12 @@ example_labels = """
174
  ### Example Images:
175
  | Image | Language | Text |
176
  |-------|----------|-------------|
177
- | cco.jpg | Comaltepec Chinantec | Classical Nahuatl text with traditional glyphs |
178
- | cnt.jpg | Tepetotutla Chiantec | Contemporary Maya writing with diacritics |
179
  | cuc.jpg | Usila Chinantec | Zapotec text from Oaxaca region |
180
  | maj.jpg | Mazatec, Jalapa de Díaz | Kui xi já maña̱ xi ngakjá ku̱a̱kúya ni xi ts'e̱ Nti̱a̱ná. Kj'a̱í ni xi ku̱a̱kúyanu̱u, kui xi ts'i̱ínkatsúnnu̱u. Najmi ts'i̱ínkie yjoho̱ nga Nda̱ Nti̱a̱ná xi ts'asjejihi̱n. B'a̱ ts'ín ki̱tsa̱ ts'i̱ín nibánehe̱ ra̱ yjoho̱ nga n'e̱kje. Nkjin xi i̱ncha ts'i̱ín ni xi i̱ncha ts'ín jóo̱, ni xi tu̱ subahá maná. |
181
  | mir.jpg | Isthmus Mixe | Cab jaduhṉ yhahixøꞌøy coo jaꞌa naam̱dägøꞌøbä tiúnät wiindsǿṉ maa jaꞌa Diostøjcän, coo jaduhṉ ñäꞌä niguiumayǿøjät. |
 
182
  """
183
 
184
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"
 
89
 
90
  try:
91
  result = model_manager.infer(img)
92
+
 
93
  if not result or len(result) == 0:
94
  return "No text detected in the image."
95
 
96
+ # Debug: Check result structure
97
+ print(f"Result type: {type(result)}")
98
+ print(f"Result content: {result}")
99
 
100
+ # Extract text and format as markdown table
101
+ output_lines = ["# Extracted Text\n"]
102
+ output_lines.append("| Text | Confidence |")
103
+ output_lines.append("|------|-----------|")
 
 
 
 
 
 
 
 
104
 
105
+ # Handle different result formats from PaddleOCRVL
106
+ if isinstance(result, list):
107
+ for item in result:
108
+ if isinstance(item, dict):
109
+ # If result is a dict with 'text' and 'confidence'
110
+ text = item.get('text', str(item))
111
+ confidence = item.get('confidence', 1.0)
112
+ output_lines.append(f"| {text} | {confidence:.2%} |")
113
+ elif isinstance(item, (list, tuple)) and len(item) >= 2:
114
+ # If result is like [(bbox, (text, confidence)), ...]
115
+ text = item[1][0] if isinstance(item[1], (list, tuple)) else str(item[1])
116
+ confidence = item[1][1] if isinstance(item[1], (list, tuple)) and len(item[1]) > 1 else 1.0
117
+ output_lines.append(f"| {text} | {confidence:.2%} |")
118
+ else:
119
+ # Fallback: just show the item
120
+ output_lines.append(f"| {str(item)} | N/A |")
121
+ else:
122
+ output_lines.append(f"| {str(result)} | N/A |")
123
 
124
+ return "\n".join(output_lines)
125
 
126
  """ if not result or len(result) == 0:
127
  return "No text detected in the image."
 
170
  examples = [
171
  ['cco.jpg'],
172
  ['cnt.jpg'],
 
173
  ['maj.jpg'],
174
  ['mir.jpg'],
175
  ['ote.jpg'],
 
181
  ### Example Images:
182
  | Image | Language | Text |
183
  |-------|----------|-------------|
184
+ | cco.jpg | Comaltepec Chinantec | jo̱ dsʉꞌ dseaˋ íˋ cajɨ́ɨmˉbre uíiꞌ˜ e dseeˉ e caꞌéerˋ do, co̱ꞌ cajíimˉ búꞌˆ quiáꞌrˉ írˋ, co̱ꞌ i̱ búꞌˆ do caféꞌˋreꞌ laco̱ꞌ féꞌˋ dseabˋ, |
185
+ | cnt.jpg | Tepetotutla Chiantec | JMƗG₄ JË₁CA₂TÓ'₂ BÁ₄ LA₂ CHONG₂ JNIOG₄. MA₂NEI'₂ BÁ₄ 'NIA'₂, JÁ₅ JAN₂ I₂'ŊIA₅₄ CRISTO. RË₂NË́₃ NË́₃, JUƗN₅ BÁ₄ I₂MA₂CA₂RË₃JNIÁ₂ I₂'ŊIA₅₄ CRISTO. JAUN₂ BÁ₄ LË₃ NE₄ JNIOG₄ A₂JA₂QUIÁN₃ JMƗG₄ JË₁CA₂TÓ'₂. I₂CA₂'UƗN₂ JË₄ QUIÁN₂ JNIOG₄ BÁ₄ 'ÉI₂. DSÓN'₂ BÁ₄ DSAU₅, |
186
  | cuc.jpg | Usila Chinantec | Zapotec text from Oaxaca region |
187
  | maj.jpg | Mazatec, Jalapa de Díaz | Kui xi já maña̱ xi ngakjá ku̱a̱kúya ni xi ts'e̱ Nti̱a̱ná. Kj'a̱í ni xi ku̱a̱kúyanu̱u, kui xi ts'i̱ínkatsúnnu̱u. Najmi ts'i̱ínkie yjoho̱ nga Nda̱ Nti̱a̱ná xi ts'asjejihi̱n. B'a̱ ts'ín ki̱tsa̱ ts'i̱ín nibánehe̱ ra̱ yjoho̱ nga n'e̱kje. Nkjin xi i̱ncha ts'i̱ín ni xi i̱ncha ts'ín jóo̱, ni xi tu̱ subahá maná. |
188
  | mir.jpg | Isthmus Mixe | Cab jaduhṉ yhahixøꞌøy coo jaꞌa naam̱dägøꞌøbä tiúnät wiindsǿṉ maa jaꞌa Diostøjcän, coo jaduhṉ ñäꞌä niguiumayǿøjät. |
189
+ | ote.jpg | Mezquital Otomi | ma'ueque ma mbʉihʉ. Nɛ gätho gahʉ dyʉ mbäją gahʉ bi 'dac ma ts |
190
  """
191
 
192
  css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;} .output_markdown {min-height: 30rem !important;}"