Text Classification
Scikit-learn
ONNX
Spam
Spam-Categoriser
SpamShield / 0.4 /config.py
M-Arjun's picture
0.4 and 0.4 - lite
0ec6609 verified
import os
from pathlib import Path
# Model paths
BASE_DIR = Path(__file__).resolve().parent
MODEL_DIR = str(BASE_DIR)
BINARY_MODEL_PATH = os.path.join(MODEL_DIR, "binary_model.pkl")
CATEGORY_MODEL_PATH = os.path.join(MODEL_DIR, "category_model.pkl")
VECTORIZER_PATH = os.path.join(MODEL_DIR, "vectorizer.pkl")
METADATA_PATH = os.path.join(MODEL_DIR, "metadata.json")
# Vectorizer settings (TF-IDF word + char, lite)
WORD_MAX_FEATURES = 10000
CHAR_MAX_FEATURES = 5000
MIN_DF = 3
SUBLINEAR_TF = True
# Prediction settings
SPAM_THRESHOLD = 0.65
SHORT_TEXT_WORD_COUNT = 2
SHORT_TEXT_THRESHOLD = 0.9
VERY_SHORT_TEXT_WORD_COUNT = 1
VERY_SHORT_TEXT_THRESHOLD = 0.96
LONG_TEXT_WORD_THRESHOLD = 80
CHUNK_MAX_WORDS = 40
MAX_CHUNKS = 24
BLOCKED_URL_DOMAINS = {
"suspicious-free-prize-now.biz",
}
# Precision/recall tuning during training
TARGET_MIN_PRECISION = 0.98