File size: 416 Bytes
54bef2f | 1 2 3 4 5 6 7 8 9 10 11 12 13 | # ...existing code...
import re
from pathlib import Path
p = Path("data/cache")
for f in p.glob("*.txt"):
text = f.read_text(encoding="utf-8")
# find repeated adjacent words like "word word" sequences
matches = re.findall(r"\b(\w+)(?:\s+\1\b)+", text, flags=re.IGNORECASE)
if matches:
print(f"{f.name} has repeated words sample: {matches[:10]}")
else:
print(f"{f.name} looks ok") |