| """ |
| Pipeline configuration β model IDs, paths, thresholds. |
| """ |
| from pathlib import Path |
|
|
| |
| PROJECT_ROOT = Path(__file__).resolve().parent.parent |
| DATA_DIR = PROJECT_ROOT / "data" |
| MODELS_DIR = PROJECT_ROOT / "models" |
| OUTPUT_DIR = PROJECT_ROOT / "output" |
|
|
| |
| XBOX_DATA = Path("/mnt/c/x_box") if Path("/mnt/c").exists() else Path("C:/x_box") |
|
|
| |
| EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-0.6B" |
| EMBEDDING_DIM = 1024 |
| EMBEDDING_FALLBACK = "dunzhang/stella_en_400M_v5" |
|
|
| |
| CLASSIFIER_MODELS = { |
| "sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest", |
| "emotion": "cardiffnlp/twitter-roberta-base-emotion", |
| "offensive": "cardiffnlp/twitter-roberta-base-offensive", |
| "irony": "cardiffnlp/twitter-roberta-base-irony", |
| "hate": "cardiffnlp/twitter-roberta-base-hate-multiclass-latest", |
| } |
|
|
| |
| TOXICITY_MODEL = "s-nlp/roberta_toxicity_classifier" |
|
|
| |
| CONGRESS_LEGISLATORS_URL = ( |
| "https://raw.githubusercontent.com/unitedstates/congress-legislators" |
| "/main/legislators-social-media.yaml" |
| ) |
| CONGRESS_LEGISLATORS_CURRENT_URL = ( |
| "https://raw.githubusercontent.com/unitedstates/congress-legislators" |
| "/main/legislators-current.yaml" |
| ) |
| SENATOR_TWEETS_DATASET = "m-newhauser/senator-tweets" |
|
|
| |
| SESSION_GAP_MINUTES = 30 |
| NIGHT_START_HOUR = 0 |
| NIGHT_END_HOUR = 6 |
| BURST_WINDOW_MINUTES = 60 |
|
|
| |
| VIRULENCE_WEIGHTS = { |
| "sentiment_negative": 0.15, |
| "emotion_anger": 0.20, |
| "offensive": 0.20, |
| "toxicity": 0.15, |
| "hate": 0.10, |
| "irony": 0.05, |
| "engagement_controversy": 0.10, |
| "burst_bonus": 0.05, |
| } |
|
|
| |
| COMPULSION_WEIGHTS = { |
| "activity": 0.20, |
| "burstiness": 0.25, |
| "night_activity": 0.15, |
| "session_intensity": 0.15, |
| "reply_reactivity": 0.10, |
| "repetition": 0.10, |
| "emoji_media_sparsity": 0.05, |
| } |
|
|
| |
| CLASSIFICATION_BATCH_SIZE = 32 |
| EMBEDDING_BATCH_SIZE = 16 |
|
|