"""
DeepSeek-powered tweet labeler — generates training labels for custom
classification dimensions that off-the-shelf models don't cover well.

Target dimensions (not well-served by CardiffNLP):
1. Ragebait virulence (0-1) — is this tweet designed to provoke outrage?
2. Compulsive engagement bait (0-1) — rhetorical questions, polls, "who agrees?"
3. Tribal signaling (0-1) — in-group/out-group framing, us-vs-them
4. Performative outrage (0-1) — outrage as spectacle vs genuine anger
5. Epistemic manipulation (0-1) — misleading framing, cherry-picking, straw-manning

Uses DeepSeek as annotator, then human-validates a sample, then trains
SetFit classifiers to distill into deterministic models.
"""
import json
import logging
import time
from pathlib import Path
from typing import Optional

import pandas as pd
from tqdm import tqdm

from .config import DATA_DIR

log = logging.getLogger(__name__)

DEEPSEEK_API_KEY = "sk-94fa064f77434e74916c3ee276d4ff08"
DEEPSEEK_BASE_URL = "https://api.deepseek.com"
DEEPSEEK_MODEL = "deepseek-chat"

# The labeling prompt — structured to produce consistent JSON output
LABELING_PROMPT = """You are an expert annotator for political social media analysis.
Rate this tweet from a US senator on the following dimensions.
Return ONLY a JSON object with scores from 0.0 to 1.0.

Dimensions:
- ragebait: Is this tweet designed to provoke outrage or anger in readers? (0=informational, 1=pure ragebait)
- engagement_bait: Does it use tricks to drive engagement — rhetorical questions, polls, "who agrees?", calls to action? (0=none, 1=heavy)
- tribal_signal: Does it use us-vs-them framing, in-group/out-group language, partisan tribalism? (0=none, 1=strong)
- performative_outrage: Is the outrage theatrical/performative rather than substantive? (0=genuine/none, 1=pure performance)
- epistemic_manipulation: Does it mislead through cherry-picking, straw-manning, false equivalence, or decontextualization? (0=fair, 1=heavily manipulative)
- agency_language: Does the author use active, agentic language (I did, we will) vs passive/victimhood framing (they did this to us)? (0=passive/victim, 1=agentic)

Tweet: "{tweet_text}"

Senator: {senator_name} ({party})

Return ONLY valid JSON, no explanation:"""


class DeepSeekLabeler:
    """
    Use DeepSeek API to generate soft labels for custom classification dimensions.
    These labels become training data for distilled SetFit classifiers.
    """

    def __init__(self, api_key: str = DEEPSEEK_API_KEY):
        try:
            from openai import OpenAI
        except ImportError:
            raise ImportError("Install openai: pip install openai")

        self.client = OpenAI(api_key=api_key, base_url=DEEPSEEK_BASE_URL)
        self.model = DEEPSEEK_MODEL

    def label_tweet(self, text: str, senator_name: str = "", party: str = "") -> dict:
        """Label a single tweet. Returns dict of dimension scores."""
        prompt = LABELING_PROMPT.format(
            tweet_text=text[:500],  # Truncate very long tweets
            senator_name=senator_name or "Unknown",
            party=party or "Unknown",
        )

        try:
            response = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                max_tokens=200,
                temperature=0.1,  # Low temp for consistency
            )
            content = response.choices[0].message.content.strip()

            # Parse JSON from response (handle markdown code blocks)
            if content.startswith("```"):
                content = content.split("```")[1]
                if content.startswith("json"):
                    content = content[4:]
            content = content.strip()

            labels = json.loads(content)

            # Validate scores are in [0, 1]
            validated = {}
            for key in ["ragebait", "engagement_bait", "tribal_signal",
                        "performative_outrage", "epistemic_manipulation",
                        "agency_language"]:
                val = labels.get(key, 0.5)
                validated[key] = max(0.0, min(1.0, float(val)))

            return validated

        except Exception as e:
            log.warning("DeepSeek labeling failed for tweet: %s — %s", text[:50], e)
            return {
                "ragebait": 0.5, "engagement_bait": 0.5, "tribal_signal": 0.5,
                "performative_outrage": 0.5, "epistemic_manipulation": 0.5,
                "agency_language": 0.5,
            }

    def label_batch(
        self,
        df: pd.DataFrame,
        text_col: str = "text",
        senator_name: str = "",
        party: str = "",
        max_tweets: Optional[int] = None,
        delay: float = 0.1,
        save_path: Optional[str] = None,
    ) -> pd.DataFrame:
        """
        Label a batch of tweets. Returns DataFrame with label columns added.

        Args:
            df: DataFrame with tweets
            max_tweets: Cap on number to label (for cost control)
            delay: Seconds between API calls (rate limiting)
            save_path: Save intermediate results (resume-friendly)
        """
        subset = df.head(max_tweets) if max_tweets else df
        n = len(subset)
        log.info("Labeling %d tweets via DeepSeek...", n)

        # Resume from checkpoint if exists
        results = []
        start_idx = 0
        if save_path and Path(save_path).exists():
            existing = pd.read_parquet(save_path)
            start_idx = len(existing)
            results = existing.to_dict("records")
            log.info("Resuming from checkpoint: %d already labeled", start_idx)

        for i, (_, row) in enumerate(tqdm(
            subset.iterrows(), total=n, desc="DeepSeek labeling", initial=start_idx
        )):
            if i < start_idx:
                continue

            text = str(row[text_col])
            labels = self.label_tweet(text, senator_name=senator_name, party=party)
            labels["_index"] = i
            labels["text"] = text[:500]
            if "tweet_id" in row:
                labels["tweet_id"] = str(row["tweet_id"])
            results.append(labels)

            # Checkpoint every 100 tweets
            if save_path and (i + 1) % 100 == 0:
                checkpoint_df = pd.DataFrame(results)
                checkpoint_df.to_parquet(save_path, index=False)
                log.info("Checkpoint saved: %d/%d", i + 1, n)

            if delay > 0:
                time.sleep(delay)

        result_df = pd.DataFrame(results)

        if save_path:
            Path(save_path).parent.mkdir(parents=True, exist_ok=True)
            result_df.to_parquet(save_path, index=False)
            log.info("Labels saved to %s", save_path)

        log.info("Labeling complete: %d tweets, %d dimensions", len(result_df), 6)
        return result_df


def build_training_set(
    labeled_df: pd.DataFrame,
    dimension: str,
    threshold_positive: float = 0.7,
    threshold_negative: float = 0.3,
    max_per_class: int = 500,
) -> pd.DataFrame:
    """
    Convert soft DeepSeek labels into a binary training set for SetFit.

    Tweets scoring > threshold_positive are positive examples.
    Tweets scoring < threshold_negative are negative examples.
    Middle scores are excluded (ambiguous).
    """
    if dimension not in labeled_df.columns:
        raise ValueError(f"Dimension '{dimension}' not in labeled data")

    pos = labeled_df[labeled_df[dimension] >= threshold_positive].head(max_per_class)
    neg = labeled_df[labeled_df[dimension] <= threshold_negative].head(max_per_class)

    pos = pos.copy()
    neg = neg.copy()
    pos["label"] = 1
    neg["label"] = 0

    training = pd.concat([pos, neg], ignore_index=True).sample(frac=1, random_state=42)
    log.info(
        "Training set for '%s': %d positive, %d negative (excluded %d ambiguous)",
        dimension, len(pos), len(neg),
        len(labeled_df) - len(pos) - len(neg),
    )
    return training[["text", "label"]]


def train_setfit_classifier(
    training_df: pd.DataFrame,
    dimension: str,
    base_model: str = "sentence-transformers/all-mpnet-base-v2",
    output_dir: Optional[str] = None,
):
    """
    Train a SetFit classifier on DeepSeek-generated labels.
    SetFit needs very few examples (8+ per class) to match full fine-tuning.
    """
    from datasets import Dataset
    from setfit import SetFitModel, Trainer, TrainingArguments

    ds = Dataset.from_pandas(training_df)
    train_test = ds.train_test_split(test_size=0.2, seed=42)

    model = SetFitModel.from_pretrained(base_model)

    args = TrainingArguments(
        batch_size=16,
        num_epochs=1,
        num_iterations=5,  # SetFit is few-shot; 5 iterations is enough
    )

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=train_test["train"],
        eval_dataset=train_test["test"],
    )

    log.info("Training SetFit classifier for '%s'...", dimension)
    trainer.train()

    metrics = trainer.evaluate()
    log.info("Evaluation metrics for '%s': %s", dimension, metrics)

    if output_dir:
        save_path = f"{output_dir}/setfit_{dimension}"
        model.save_pretrained(save_path)
        log.info("Model saved to %s", save_path)

    return model, metrics