freeman.genie777@gmail.com
readme
2b9435d
import re
def is_spam(text):
spam_patterns = [
# URL shorteners and patterns associated with scam websites
r'(http|https)://(bit\.ly|me2\.kr|buly\.kr)[^\s]*',
# Numbers and special characters repeated in sequence
r'\d{2,}[%โ†‘โ†“]',
# Unusual amount of special characters (e.g. parentheses & brackets)
r'[(){}<>]{3,}',
# Unusual phrases commonly found in spam messages
r'(์Šˆํผ๊ฐœ๋ฏธ|์„ ๋ฌผ|์ˆ˜์ต|๊ฐ์‚ฌํ•ฉ๋‹ˆ๋‹ค|์นด์นด์˜คํ†ก|๋ชจ์ง‘|์ถ•ํ•˜|๊ต์œก|์˜์—…์ |์˜คํ”ˆ์ดˆ๋Œ€|ํญ๋“ฑ|๋‹ค์Œ์ฃผ๋„ ์ด์–ด์„œ|์ƒ์Šน)',
# Repeated exclamation marks or question marks
r'[!?]{2,}',
# Words with an excessive amount of capital letters
r'[A-Z\s]{4,}',
# Pattern of time or percentage followed by an unusual word or character
r'(\d[ํ•ด์„ ]%|\d+์‹œ(?:\s*๊ฐ„)?|\d+[\s-]*์›”)[^๊ฐ€-ํžฃ]+',
# Repeated phrases or words
r'(\b\w+\b)\W+\1'
]
for pattern in spam_patterns:
if re.search(pattern, text):
return True
return False