Yaz Hobooti commited on
Commit
2874826
·
1 Parent(s): bae9f7f

Fix indentation error in find_misspell_boxes function

Browse files
Files changed (1) hide show
  1. pdf_comparator.py +8 -8
pdf_comparator.py CHANGED
@@ -131,25 +131,25 @@ def find_misspell_boxes(img: Image.Image) -> List[Box]:
131
  try:
132
  spell = SpellChecker()
133
  data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
134
- except Exception:
135
- return []
136
- n = len(data.get("text", []))
137
  boxes: List[Box] = []
138
- for i in range(n):
139
  text = data["text"][i]
140
  if not text:
141
- continue
142
  token = normalize_token(text)
143
  if len(token) < 2:
144
- continue
145
  if token in spell:
146
- continue
147
  left = data.get("left", [0])[i]
148
  top = data.get("top", [0])[i]
149
  width = data.get("width", [0])[i]
150
  height= data.get("height",[0])[i]
151
  if width <= 0 or height <= 0:
152
- continue
153
  boxes.append(Box(top, left, top+height, left+width, width*height))
154
  return boxes
155
 
 
131
  try:
132
  spell = SpellChecker()
133
  data = pytesseract.image_to_data(img, output_type=pytesseract.Output.DICT)
134
+ except Exception:
135
+ return []
136
+ n = len(data.get("text", []))
137
  boxes: List[Box] = []
138
+ for i in range(n):
139
  text = data["text"][i]
140
  if not text:
141
+ continue
142
  token = normalize_token(text)
143
  if len(token) < 2:
144
+ continue
145
  if token in spell:
146
+ continue
147
  left = data.get("left", [0])[i]
148
  top = data.get("top", [0])[i]
149
  width = data.get("width", [0])[i]
150
  height= data.get("height",[0])[i]
151
  if width <= 0 or height <= 0:
152
+ continue
153
  boxes.append(Box(top, left, top+height, left+width, width*height))
154
  return boxes
155