File size: 517 Bytes
c551752
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import re

# Text cleaning function, makes everything lowercase, removed non alpha-numeric characters and normalize white spaces
def simple_text_clean(text: str) -> str:
    if isinstance(text, str):
        text = text.lower()
        text = re.sub(r'[^a-z0-9\s]', '', text) # Keep spaces, remove other non-alphanumeric
        text = re.sub(r'\s+', ' ', text).strip()
    else:
        text = '' 
    return text

# Class names for predictions 
CLASS_NAMES = ['Legitimate', 'Phishing'] # 0: Legitimate, 1: Phishing