| import re | |
| def is_spam(text): | |
| spam_patterns = [ | |
| # URL shorteners and patterns associated with scam websites | |
| r'(http|https)://(bit\.ly|me2\.kr|buly\.kr)[^\s]*', | |
| # Numbers and special characters repeated in sequence | |
| r'\d{2,}[%โโ]', | |
| # Unusual amount of special characters (e.g. parentheses & brackets) | |
| r'[(){}<>]{3,}', | |
| # Unusual phrases commonly found in spam messages | |
| r'(์ํผ๊ฐ๋ฏธ|์ ๋ฌผ|์์ต|๊ฐ์ฌํฉ๋๋ค|์นด์นด์คํก|๋ชจ์ง|์ถํ|๊ต์ก|์์ ์ |์คํ์ด๋|ํญ๋ฑ|๋ค์์ฃผ๋ ์ด์ด์|์์น)', | |
| # Repeated exclamation marks or question marks | |
| r'[!?]{2,}', | |
| # Words with an excessive amount of capital letters | |
| r'[A-Z\s]{4,}', | |
| # Pattern of time or percentage followed by an unusual word or character | |
| r'(\d[ํด์ ]%|\d+์(?:\s*๊ฐ)?|\d+[\s-]*์)[^๊ฐ-ํฃ]+', | |
| # Repeated phrases or words | |
| r'(\b\w+\b)\W+\1' | |
| ] | |
| for pattern in spam_patterns: | |
| if re.search(pattern, text): | |
| return True | |
| return False | |