""" DeepSeek-powered tweet labeler — generates training labels for custom classification dimensions that off-the-shelf models don't cover well. Target dimensions (not well-served by CardiffNLP): 1. Ragebait virulence (0-1) — is this tweet designed to provoke outrage? 2. Compulsive engagement bait (0-1) — rhetorical questions, polls, "who agrees?" 3. Tribal signaling (0-1) — in-group/out-group framing, us-vs-them 4. Performative outrage (0-1) — outrage as spectacle vs genuine anger 5. Epistemic manipulation (0-1) — misleading framing, cherry-picking, straw-manning Uses DeepSeek as annotator, then human-validates a sample, then trains SetFit classifiers to distill into deterministic models. """ import json import logging import time from pathlib import Path from typing import Optional import pandas as pd from tqdm import tqdm from .config import DATA_DIR log = logging.getLogger(__name__) DEEPSEEK_API_KEY = "sk-94fa064f77434e74916c3ee276d4ff08" DEEPSEEK_BASE_URL = "https://api.deepseek.com" DEEPSEEK_MODEL = "deepseek-chat" # The labeling prompt — structured to produce consistent JSON output LABELING_PROMPT = """You are an expert annotator for political social media analysis. Rate this tweet from a US senator on the following dimensions. Return ONLY a JSON object with scores from 0.0 to 1.0. Dimensions: - ragebait: Is this tweet designed to provoke outrage or anger in readers? (0=informational, 1=pure ragebait) - engagement_bait: Does it use tricks to drive engagement — rhetorical questions, polls, "who agrees?", calls to action? (0=none, 1=heavy) - tribal_signal: Does it use us-vs-them framing, in-group/out-group language, partisan tribalism? (0=none, 1=strong) - performative_outrage: Is the outrage theatrical/performative rather than substantive? (0=genuine/none, 1=pure performance) - epistemic_manipulation: Does it mislead through cherry-picking, straw-manning, false equivalence, or decontextualization? (0=fair, 1=heavily manipulative) - agency_language: Does the author use active, agentic language (I did, we will) vs passive/victimhood framing (they did this to us)? (0=passive/victim, 1=agentic) Tweet: "{tweet_text}" Senator: {senator_name} ({party}) Return ONLY valid JSON, no explanation:""" class DeepSeekLabeler: """ Use DeepSeek API to generate soft labels for custom classification dimensions. These labels become training data for distilled SetFit classifiers. """ def __init__(self, api_key: str = DEEPSEEK_API_KEY): try: from openai import OpenAI except ImportError: raise ImportError("Install openai: pip install openai") self.client = OpenAI(api_key=api_key, base_url=DEEPSEEK_BASE_URL) self.model = DEEPSEEK_MODEL def label_tweet(self, text: str, senator_name: str = "", party: str = "") -> dict: """Label a single tweet. Returns dict of dimension scores.""" prompt = LABELING_PROMPT.format( tweet_text=text[:500], # Truncate very long tweets senator_name=senator_name or "Unknown", party=party or "Unknown", ) try: response = self.client.chat.completions.create( model=self.model, messages=[{"role": "user", "content": prompt}], max_tokens=200, temperature=0.1, # Low temp for consistency ) content = response.choices[0].message.content.strip() # Parse JSON from response (handle markdown code blocks) if content.startswith("```"): content = content.split("```")[1] if content.startswith("json"): content = content[4:] content = content.strip() labels = json.loads(content) # Validate scores are in [0, 1] validated = {} for key in ["ragebait", "engagement_bait", "tribal_signal", "performative_outrage", "epistemic_manipulation", "agency_language"]: val = labels.get(key, 0.5) validated[key] = max(0.0, min(1.0, float(val))) return validated except Exception as e: log.warning("DeepSeek labeling failed for tweet: %s — %s", text[:50], e) return { "ragebait": 0.5, "engagement_bait": 0.5, "tribal_signal": 0.5, "performative_outrage": 0.5, "epistemic_manipulation": 0.5, "agency_language": 0.5, } def label_batch( self, df: pd.DataFrame, text_col: str = "text", senator_name: str = "", party: str = "", max_tweets: Optional[int] = None, delay: float = 0.1, save_path: Optional[str] = None, ) -> pd.DataFrame: """ Label a batch of tweets. Returns DataFrame with label columns added. Args: df: DataFrame with tweets max_tweets: Cap on number to label (for cost control) delay: Seconds between API calls (rate limiting) save_path: Save intermediate results (resume-friendly) """ subset = df.head(max_tweets) if max_tweets else df n = len(subset) log.info("Labeling %d tweets via DeepSeek...", n) # Resume from checkpoint if exists results = [] start_idx = 0 if save_path and Path(save_path).exists(): existing = pd.read_parquet(save_path) start_idx = len(existing) results = existing.to_dict("records") log.info("Resuming from checkpoint: %d already labeled", start_idx) for i, (_, row) in enumerate(tqdm( subset.iterrows(), total=n, desc="DeepSeek labeling", initial=start_idx )): if i < start_idx: continue text = str(row[text_col]) labels = self.label_tweet(text, senator_name=senator_name, party=party) labels["_index"] = i labels["text"] = text[:500] if "tweet_id" in row: labels["tweet_id"] = str(row["tweet_id"]) results.append(labels) # Checkpoint every 100 tweets if save_path and (i + 1) % 100 == 0: checkpoint_df = pd.DataFrame(results) checkpoint_df.to_parquet(save_path, index=False) log.info("Checkpoint saved: %d/%d", i + 1, n) if delay > 0: time.sleep(delay) result_df = pd.DataFrame(results) if save_path: Path(save_path).parent.mkdir(parents=True, exist_ok=True) result_df.to_parquet(save_path, index=False) log.info("Labels saved to %s", save_path) log.info("Labeling complete: %d tweets, %d dimensions", len(result_df), 6) return result_df def build_training_set( labeled_df: pd.DataFrame, dimension: str, threshold_positive: float = 0.7, threshold_negative: float = 0.3, max_per_class: int = 500, ) -> pd.DataFrame: """ Convert soft DeepSeek labels into a binary training set for SetFit. Tweets scoring > threshold_positive are positive examples. Tweets scoring < threshold_negative are negative examples. Middle scores are excluded (ambiguous). """ if dimension not in labeled_df.columns: raise ValueError(f"Dimension '{dimension}' not in labeled data") pos = labeled_df[labeled_df[dimension] >= threshold_positive].head(max_per_class) neg = labeled_df[labeled_df[dimension] <= threshold_negative].head(max_per_class) pos = pos.copy() neg = neg.copy() pos["label"] = 1 neg["label"] = 0 training = pd.concat([pos, neg], ignore_index=True).sample(frac=1, random_state=42) log.info( "Training set for '%s': %d positive, %d negative (excluded %d ambiguous)", dimension, len(pos), len(neg), len(labeled_df) - len(pos) - len(neg), ) return training[["text", "label"]] def train_setfit_classifier( training_df: pd.DataFrame, dimension: str, base_model: str = "sentence-transformers/all-mpnet-base-v2", output_dir: Optional[str] = None, ): """ Train a SetFit classifier on DeepSeek-generated labels. SetFit needs very few examples (8+ per class) to match full fine-tuning. """ from datasets import Dataset from setfit import SetFitModel, Trainer, TrainingArguments ds = Dataset.from_pandas(training_df) train_test = ds.train_test_split(test_size=0.2, seed=42) model = SetFitModel.from_pretrained(base_model) args = TrainingArguments( batch_size=16, num_epochs=1, num_iterations=5, # SetFit is few-shot; 5 iterations is enough ) trainer = Trainer( model=model, args=args, train_dataset=train_test["train"], eval_dataset=train_test["test"], ) log.info("Training SetFit classifier for '%s'...", dimension) trainer.train() metrics = trainer.evaluate() log.info("Evaluation metrics for '%s': %s", dimension, metrics) if output_dir: save_path = f"{output_dir}/setfit_{dimension}" model.save_pretrained(save_path) log.info("Model saved to %s", save_path) return model, metrics