# ...existing code... import re from pathlib import Path p = Path("data/cache") for f in p.glob("*.txt"): text = f.read_text(encoding="utf-8") # find repeated adjacent words like "word word" sequences matches = re.findall(r"\b(\w+)(?:\s+\1\b)+", text, flags=re.IGNORECASE) if matches: print(f"{f.name} has repeated words sample: {matches[:10]}") else: print(f"{f.name} looks ok")