"""
Pipeline configuration — model IDs, paths, thresholds.
"""
from pathlib import Path

# ── Paths ──────────────────────────────────────────────────────────
PROJECT_ROOT = Path(__file__).resolve().parent.parent
DATA_DIR = PROJECT_ROOT / "data"
MODELS_DIR = PROJECT_ROOT / "models"
OUTPUT_DIR = PROJECT_ROOT / "output"

# Windows mount path for existing data
XBOX_DATA = Path("/mnt/c/x_box") if Path("/mnt/c").exists() else Path("C:/x_box")

# ── Embedding model (MTEB #1 under 1B) ────────────────────────────
EMBEDDING_MODEL = "Qwen/Qwen3-Embedding-0.6B"
EMBEDDING_DIM = 1024
EMBEDDING_FALLBACK = "dunzhang/stella_en_400M_v5"

# ── Classification models (CardiffNLP Twitter-RoBERTa suite) ──────
CLASSIFIER_MODELS = {
    "sentiment": "cardiffnlp/twitter-roberta-base-sentiment-latest",
    "emotion": "cardiffnlp/twitter-roberta-base-emotion",
    "offensive": "cardiffnlp/twitter-roberta-base-offensive",
    "irony": "cardiffnlp/twitter-roberta-base-irony",
    "hate": "cardiffnlp/twitter-roberta-base-hate-multiclass-latest",
}

# ── Toxicity model ────────────────────────────────────────────────
TOXICITY_MODEL = "s-nlp/roberta_toxicity_classifier"

# ── Senator data sources ──────────────────────────────────────────
CONGRESS_LEGISLATORS_URL = (
    "https://raw.githubusercontent.com/unitedstates/congress-legislators"
    "/main/legislators-social-media.yaml"
)
CONGRESS_LEGISLATORS_CURRENT_URL = (
    "https://raw.githubusercontent.com/unitedstates/congress-legislators"
    "/main/legislators-current.yaml"
)
SENATOR_TWEETS_DATASET = "m-newhauser/senator-tweets"

# ── Behavioral thresholds ─────────────────────────────────────────
SESSION_GAP_MINUTES = 30       # gap before new session
NIGHT_START_HOUR = 0           # UTC
NIGHT_END_HOUR = 6             # UTC
BURST_WINDOW_MINUTES = 60      # window for burst detection

# ── Virulence score weights ───────────────────────────────────────
VIRULENCE_WEIGHTS = {
    "sentiment_negative": 0.15,
    "emotion_anger": 0.20,
    "offensive": 0.20,
    "toxicity": 0.15,
    "hate": 0.10,
    "irony": 0.05,
    "engagement_controversy": 0.10,
    "burst_bonus": 0.05,
}

# ── Compulsion score weights ──────────────────────────────────────
COMPULSION_WEIGHTS = {
    "activity": 0.20,
    "burstiness": 0.25,
    "night_activity": 0.15,
    "session_intensity": 0.15,
    "reply_reactivity": 0.10,
    "repetition": 0.10,
    "emoji_media_sparsity": 0.05,
}

# ── Batch sizes ───────────────────────────────────────────────────
CLASSIFICATION_BATCH_SIZE = 32
EMBEDDING_BATCH_SIZE = 16