Spaces:
Sleeping
Sleeping
Update pdf_utils_finalclean_NYmac_final.py
Browse files
pdf_utils_finalclean_NYmac_final.py
CHANGED
|
@@ -80,7 +80,7 @@ def clean_text(text: str) -> str:
|
|
| 80 |
while j < len(words) and is_entirely_double_letters(words[j]):
|
| 81 |
j += 1
|
| 82 |
run_len = j - i
|
| 83 |
-
if run_len >=
|
| 84 |
i = j
|
| 85 |
continue
|
| 86 |
else: # 1- or 2-word run → keep, dedup for readability
|
|
|
|
| 80 |
while j < len(words) and is_entirely_double_letters(words[j]):
|
| 81 |
j += 1
|
| 82 |
run_len = j - i
|
| 83 |
+
if run_len >= 5: # ≥3 consecutive doubled words → drop run
|
| 84 |
i = j
|
| 85 |
continue
|
| 86 |
else: # 1- or 2-word run → keep, dedup for readability
|