Instructions to use M-Arjun/SpamShield with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Scikit-learn
How to use M-Arjun/SpamShield with Scikit-learn:
from huggingface_hub import hf_hub_download import joblib model = joblib.load( hf_hub_download("M-Arjun/SpamShield", "sklearn_model.joblib") ) # only load pickle files from sources you trust # read more about it here https://skops.readthedocs.io/en/stable/persistence.html - Notebooks
- Google Colab
- Kaggle
| import os | |
| from pathlib import Path | |
| # Model paths | |
| BASE_DIR = Path(__file__).resolve().parent | |
| MODEL_DIR = str(BASE_DIR) | |
| BINARY_MODEL_PATH = os.path.join(MODEL_DIR, "binary_model.pkl") | |
| CATEGORY_MODEL_PATH = os.path.join(MODEL_DIR, "category_model.pkl") | |
| VECTORIZER_PATH = os.path.join(MODEL_DIR, "vectorizer.pkl") | |
| METADATA_PATH = os.path.join(MODEL_DIR, "metadata.json") | |
| # Vectorizer settings (TF-IDF word + char, lite) | |
| WORD_MAX_FEATURES = 10000 | |
| CHAR_MAX_FEATURES = 5000 | |
| MIN_DF = 3 | |
| SUBLINEAR_TF = True | |
| # Prediction settings | |
| SPAM_THRESHOLD = 0.65 | |
| SHORT_TEXT_WORD_COUNT = 2 | |
| SHORT_TEXT_THRESHOLD = 0.9 | |
| VERY_SHORT_TEXT_WORD_COUNT = 1 | |
| VERY_SHORT_TEXT_THRESHOLD = 0.96 | |
| LONG_TEXT_WORD_THRESHOLD = 80 | |
| CHUNK_MAX_WORDS = 40 | |
| MAX_CHUNKS = 24 | |
| BLOCKED_URL_DOMAINS = { | |
| "suspicious-free-prize-now.biz", | |
| } | |
| # Precision/recall tuning during training | |
| TARGET_MIN_PRECISION = 0.98 | |