heerjtdev commited on
Commit
bc9a5d0
·
verified ·
1 Parent(s): fdafee6

Update working_yolo_pipeline.py

Browse files
Files changed (1) hide show
  1. working_yolo_pipeline.py +4 -3
working_yolo_pipeline.py CHANGED
@@ -759,10 +759,11 @@ def extract_native_words_and_convert(fitz_page, scale_factor: float = 2.0) -> li
759
  # --- FIX: ROBUST SANITIZATION ---
760
  # 1. Encode to UTF-8 ignoring errors (strips surrogates)
761
  # 2. Decode back to string
762
- cleaned_word_bytes = word.encode('utf-8', 'ignore')
763
- cleaned_word = cleaned_word_bytes.decode('utf-8')
 
764
 
765
- cleaned_word = cleaned_word.strip()
766
  if not cleaned_word: continue
767
 
768
  x1_pix = int(x1 * scale_factor)
 
759
  # --- FIX: ROBUST SANITIZATION ---
760
  # 1. Encode to UTF-8 ignoring errors (strips surrogates)
761
  # 2. Decode back to string
762
+ # cleaned_word_bytes = word.encode('utf-8', 'ignore')
763
+ # cleaned_word = cleaned_word_bytes.decode('utf-8')
764
+ cleaned_word = word.encode('utf-8', 'ignore').decode('utf-8').strip()
765
 
766
+ # cleaned_word = cleaned_word.strip()
767
  if not cleaned_word: continue
768
 
769
  x1_pix = int(x1 * scale_factor)