Spaces:

hkai20000
/

ocrAPP

Sleeping

App Files Files Community

hkai20000 commited on Jan 31

Commit

92a1e4b

verified ·

1 Parent(s): e3a5f01

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -12

main.py CHANGED Viewed

@@ -198,35 +198,51 @@ def extract_text_structured(result) -> str:
     """
     Extract text from docTR result preserving logical structure.
     Groups text blocks by vertical position for better table handling.
     """
-    all_words = []
     for page in result.pages:
         for block in page.blocks:
             for line in block.lines:
-                line_text = ""
-                min_y = float('inf')
                 for word in line.words:
-                    line_text += word.value + " "
-                    min_y = min(min_y, word.geometry[0][1])
                 if line_text.strip():
-                    all_words.append({
                         'text': line_text.strip(),
                         'y': min_y,
-                        'x': line.geometry[0][0] if hasattr(line, 'geometry') else 0
                     })
-    all_words.sort(key=lambda w: (round(w['y'] * 20) / 20, w['x']))
     result_text = ""
     prev_y = -1
-    for word_info in all_words:
-        current_y_group = round(word_info['y'] * 20) / 20
         if prev_y != -1 and current_y_group != prev_y:
             result_text += "\n"
-        result_text += word_info['text'] + " "
         prev_y = current_y_group
     return result_text.strip()

     """
     Extract text from docTR result preserving logical structure.
     Groups text blocks by vertical position for better table handling.
+    Sorts words within each line by x-position (left to right).
     """
+    all_lines = []
     for page in result.pages:
         for block in page.blocks:
             for line in block.lines:
+                # Collect all words with their positions
+                words_in_line = []
                 for word in line.words:
+                    words_in_line.append({
+                        'text': word.value,
+                        'x': word.geometry[0][0],  # x position (left edge)
+                        'y': word.geometry[0][1]   # y position (top edge)
+                    })
+                if not words_in_line:
+                    continue
+                # Sort words by x position (left to right)
+                words_in_line.sort(key=lambda w: w['x'])
+                # Build line text from sorted words
+                line_text = " ".join([w['text'] for w in words_in_line])
                 if line_text.strip():
+                    min_y = min(w['y'] for w in words_in_line)
+                    min_x = min(w['x'] for w in words_in_line)
+                    all_lines.append({
                         'text': line_text.strip(),
                         'y': min_y,
+                        'x': min_x
                     })
+    # Sort lines by y position (top to bottom), then x (left to right for same row)
+    all_lines.sort(key=lambda l: (round(l['y'] * 20) / 20, l['x']))
+    # Build final text with line breaks between different y-groups
     result_text = ""
     prev_y = -1
+    for line_info in all_lines:
+        current_y_group = round(line_info['y'] * 20) / 20
         if prev_y != -1 and current_y_group != prev_y:
             result_text += "\n"
+        result_text += line_info['text'] + " "
         prev_y = current_y_group
     return result_text.strip()