jimnoneill's picture
Upload folder using huggingface_hub
178b774 verified
"""
DeepSeek-powered tweet labeler — generates training labels for custom
classification dimensions that off-the-shelf models don't cover well.
Target dimensions (not well-served by CardiffNLP):
1. Ragebait virulence (0-1) — is this tweet designed to provoke outrage?
2. Compulsive engagement bait (0-1) — rhetorical questions, polls, "who agrees?"
3. Tribal signaling (0-1) — in-group/out-group framing, us-vs-them
4. Performative outrage (0-1) — outrage as spectacle vs genuine anger
5. Epistemic manipulation (0-1) — misleading framing, cherry-picking, straw-manning
Uses DeepSeek as annotator, then human-validates a sample, then trains
SetFit classifiers to distill into deterministic models.
"""
import json
import logging
import time
from pathlib import Path
from typing import Optional
import pandas as pd
from tqdm import tqdm
from .config import DATA_DIR
log = logging.getLogger(__name__)
DEEPSEEK_API_KEY = "sk-94fa064f77434e74916c3ee276d4ff08"
DEEPSEEK_BASE_URL = "https://api.deepseek.com"
DEEPSEEK_MODEL = "deepseek-chat"
# The labeling prompt — structured to produce consistent JSON output
LABELING_PROMPT = """You are an expert annotator for political social media analysis.
Rate this tweet from a US senator on the following dimensions.
Return ONLY a JSON object with scores from 0.0 to 1.0.
Dimensions:
- ragebait: Is this tweet designed to provoke outrage or anger in readers? (0=informational, 1=pure ragebait)
- engagement_bait: Does it use tricks to drive engagement — rhetorical questions, polls, "who agrees?", calls to action? (0=none, 1=heavy)
- tribal_signal: Does it use us-vs-them framing, in-group/out-group language, partisan tribalism? (0=none, 1=strong)
- performative_outrage: Is the outrage theatrical/performative rather than substantive? (0=genuine/none, 1=pure performance)
- epistemic_manipulation: Does it mislead through cherry-picking, straw-manning, false equivalence, or decontextualization? (0=fair, 1=heavily manipulative)
- agency_language: Does the author use active, agentic language (I did, we will) vs passive/victimhood framing (they did this to us)? (0=passive/victim, 1=agentic)
Tweet: "{tweet_text}"
Senator: {senator_name} ({party})
Return ONLY valid JSON, no explanation:"""
class DeepSeekLabeler:
"""
Use DeepSeek API to generate soft labels for custom classification dimensions.
These labels become training data for distilled SetFit classifiers.
"""
def __init__(self, api_key: str = DEEPSEEK_API_KEY):
try:
from openai import OpenAI
except ImportError:
raise ImportError("Install openai: pip install openai")
self.client = OpenAI(api_key=api_key, base_url=DEEPSEEK_BASE_URL)
self.model = DEEPSEEK_MODEL
def label_tweet(self, text: str, senator_name: str = "", party: str = "") -> dict:
"""Label a single tweet. Returns dict of dimension scores."""
prompt = LABELING_PROMPT.format(
tweet_text=text[:500], # Truncate very long tweets
senator_name=senator_name or "Unknown",
party=party or "Unknown",
)
try:
response = self.client.chat.completions.create(
model=self.model,
messages=[{"role": "user", "content": prompt}],
max_tokens=200,
temperature=0.1, # Low temp for consistency
)
content = response.choices[0].message.content.strip()
# Parse JSON from response (handle markdown code blocks)
if content.startswith("```"):
content = content.split("```")[1]
if content.startswith("json"):
content = content[4:]
content = content.strip()
labels = json.loads(content)
# Validate scores are in [0, 1]
validated = {}
for key in ["ragebait", "engagement_bait", "tribal_signal",
"performative_outrage", "epistemic_manipulation",
"agency_language"]:
val = labels.get(key, 0.5)
validated[key] = max(0.0, min(1.0, float(val)))
return validated
except Exception as e:
log.warning("DeepSeek labeling failed for tweet: %s — %s", text[:50], e)
return {
"ragebait": 0.5, "engagement_bait": 0.5, "tribal_signal": 0.5,
"performative_outrage": 0.5, "epistemic_manipulation": 0.5,
"agency_language": 0.5,
}
def label_batch(
self,
df: pd.DataFrame,
text_col: str = "text",
senator_name: str = "",
party: str = "",
max_tweets: Optional[int] = None,
delay: float = 0.1,
save_path: Optional[str] = None,
) -> pd.DataFrame:
"""
Label a batch of tweets. Returns DataFrame with label columns added.
Args:
df: DataFrame with tweets
max_tweets: Cap on number to label (for cost control)
delay: Seconds between API calls (rate limiting)
save_path: Save intermediate results (resume-friendly)
"""
subset = df.head(max_tweets) if max_tweets else df
n = len(subset)
log.info("Labeling %d tweets via DeepSeek...", n)
# Resume from checkpoint if exists
results = []
start_idx = 0
if save_path and Path(save_path).exists():
existing = pd.read_parquet(save_path)
start_idx = len(existing)
results = existing.to_dict("records")
log.info("Resuming from checkpoint: %d already labeled", start_idx)
for i, (_, row) in enumerate(tqdm(
subset.iterrows(), total=n, desc="DeepSeek labeling", initial=start_idx
)):
if i < start_idx:
continue
text = str(row[text_col])
labels = self.label_tweet(text, senator_name=senator_name, party=party)
labels["_index"] = i
labels["text"] = text[:500]
if "tweet_id" in row:
labels["tweet_id"] = str(row["tweet_id"])
results.append(labels)
# Checkpoint every 100 tweets
if save_path and (i + 1) % 100 == 0:
checkpoint_df = pd.DataFrame(results)
checkpoint_df.to_parquet(save_path, index=False)
log.info("Checkpoint saved: %d/%d", i + 1, n)
if delay > 0:
time.sleep(delay)
result_df = pd.DataFrame(results)
if save_path:
Path(save_path).parent.mkdir(parents=True, exist_ok=True)
result_df.to_parquet(save_path, index=False)
log.info("Labels saved to %s", save_path)
log.info("Labeling complete: %d tweets, %d dimensions", len(result_df), 6)
return result_df
def build_training_set(
labeled_df: pd.DataFrame,
dimension: str,
threshold_positive: float = 0.7,
threshold_negative: float = 0.3,
max_per_class: int = 500,
) -> pd.DataFrame:
"""
Convert soft DeepSeek labels into a binary training set for SetFit.
Tweets scoring > threshold_positive are positive examples.
Tweets scoring < threshold_negative are negative examples.
Middle scores are excluded (ambiguous).
"""
if dimension not in labeled_df.columns:
raise ValueError(f"Dimension '{dimension}' not in labeled data")
pos = labeled_df[labeled_df[dimension] >= threshold_positive].head(max_per_class)
neg = labeled_df[labeled_df[dimension] <= threshold_negative].head(max_per_class)
pos = pos.copy()
neg = neg.copy()
pos["label"] = 1
neg["label"] = 0
training = pd.concat([pos, neg], ignore_index=True).sample(frac=1, random_state=42)
log.info(
"Training set for '%s': %d positive, %d negative (excluded %d ambiguous)",
dimension, len(pos), len(neg),
len(labeled_df) - len(pos) - len(neg),
)
return training[["text", "label"]]
def train_setfit_classifier(
training_df: pd.DataFrame,
dimension: str,
base_model: str = "sentence-transformers/all-mpnet-base-v2",
output_dir: Optional[str] = None,
):
"""
Train a SetFit classifier on DeepSeek-generated labels.
SetFit needs very few examples (8+ per class) to match full fine-tuning.
"""
from datasets import Dataset
from setfit import SetFitModel, Trainer, TrainingArguments
ds = Dataset.from_pandas(training_df)
train_test = ds.train_test_split(test_size=0.2, seed=42)
model = SetFitModel.from_pretrained(base_model)
args = TrainingArguments(
batch_size=16,
num_epochs=1,
num_iterations=5, # SetFit is few-shot; 5 iterations is enough
)
trainer = Trainer(
model=model,
args=args,
train_dataset=train_test["train"],
eval_dataset=train_test["test"],
)
log.info("Training SetFit classifier for '%s'...", dimension)
trainer.train()
metrics = trainer.evaluate()
log.info("Evaluation metrics for '%s': %s", dimension, metrics)
if output_dir:
save_path = f"{output_dir}/setfit_{dimension}"
model.save_pretrained(save_path)
log.info("Model saved to %s", save_path)
return model, metrics