Spaces:
Sleeping
Sleeping
| import re | |
| def clean_text(text): | |
| lines = text.splitlines() | |
| cleaned = [] | |
| for line in lines: | |
| line = line.strip() | |
| if not line or len(line) < 5: | |
| continue | |
| if re.search(r'(page \\d+|www\\.|linkedin|facebook|youtube|subscribe|@)', line, re.IGNORECASE): | |
| continue | |
| if line.isupper() and len(line) < 40: | |
| continue | |
| line = re.sub(r'\\s{2,}', ' ', line) | |
| cleaned.append(line) | |
| return "\n".join(cleaned) | |