Spaces:

SamanthaStorm
/

TetherSST

Runtime error

App Files Files Community

TetherSST / app.py

SamanthaStorm

Update app.py

1214a2c verified 9 months ago

raw

history blame contribute delete

11.5 kB

	import gradio as gr
	import torch
	from transformers import pipeline as hf_pipeline, AutoModelForSequenceClassification, AutoTokenizer
	from PIL import Image
	import io
	import easyocr
	import numpy as np
	import pandas as pd

	# ——— Load and preprocess NRC EmoLex ——————————————————————————————————
	EMOLEX_PATH = "NRC-Emotion-Lexicon-Wordlevel-v0.92.txt"
	emo_raw = pd.read_csv(
	EMOLEX_PATH,
	sep="\t",
	names=["word","emotion","flag"],
	comment="#",
	header=None
	)
	emo_df = (
	emo_raw
	.pivot(index="word", columns="emotion", values="flag")
	.fillna(0)
	.astype(int)
	)
	EMOLEX = emo_df.to_dict(orient="index")

	def score_emolex(text_lower):
	counts = {emo: 0 for emo in emo_df.columns}
	for tok in text_lower.split():
	if tok in EMOLEX:
	for emo, flag in EMOLEX[tok].items():
	counts[emo] += flag
	return counts

	# ——— Load MPQA Subjectivity Lexicon —————————————————————————————————————————————
	MPQA_PATH = "subjclueslen1-HLTEMNLP05.tff"
	mpqa_lex = {}
	with open(MPQA_PATH, encoding="utf-8") as f:
	for line in f:
	line = line.strip()
	if not line or line.startswith("#"):
	continue

	# build fields dict but skip any token without '='
	fields = {}
	for item in line.split():
	if "=" not in item:
	continue
	key, val = item.split("=", 1)
	fields[key] = val

	# must have word1
	if "word1" not in fields:
	continue
	w = fields.pop("word1").lower()
	mpqa_lex.setdefault(w, []).append(fields)

	# ——— 1) Emotion Pipeline ————————————————————————————————————————————————
	emotion_pipeline = hf_pipeline(
	"text-classification",
	model="j-hartmann/emotion-english-distilroberta-base",
	top_k=None,
	truncation=True
	)
	def get_emotion_profile(text):
	results = emotion_pipeline(text)
	if isinstance(results, list) and isinstance(results[0], list):
	results = results[0]
	return {r["label"].lower(): round(r["score"], 3) for r in results}

	APOLOGY_KEYWORDS = ["sorry", "apology", "forgive"]

	# ——— 2) Abuse-Patterns Model ——————————————————————————————————————————————
	model_name = "SamanthaStorm/tether-multilabel-v3"
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)

	LABELS = [
	"blame shifting", "contradictory statements", "control", "dismissiveness",
	"gaslighting", "guilt tripping", "insults", "obscure language",
	"projection", "recovery phase", "threat"
	]
	THRESHOLDS = {
	"blame shifting": 0.28,
	"contradictory statements": 0.27,
	"control": 0.08,
	"dismissiveness": 0.32,
	"gaslighting": 0.27,
	"guilt tripping": 0.31,
	"insults": 0.10,
	"obscure language": 0.55,
	"projection": 0.09,
	"recovery phase": 0.33,
	"threat": 0.15
	}

	# ——— 3) Initialize EasyOCR reader ————————————————————————————————————————————
	ocr_reader = easyocr.Reader(["en"], gpu=False)

	# ——— 4) Emotional-Tone Tagging —————————————————————————————————————————————
	def get_emotional_tone_tag(emotion_profile, patterns, text_lower):
	sadness = emotion_profile.get("sadness", 0)
	joy = emotion_profile.get("joy", 0)
	neutral = emotion_profile.get("neutral", 0)
	disgust = emotion_profile.get("disgust", 0)
	anger = emotion_profile.get("anger", 0)
	fear = emotion_profile.get("fear", 0)


	# NRC-EmoLex counts
	words = text_lower.split()
	lex_counts = {
	emo: sum(EMOLEX.get(w, {}).get(emo, 0) for w in words)
	for emo in ["anger","joy","sadness","fear","disgust"]
	}

	# MPQA counts
	mpqa_counts = {"strongsubj":0,"weaksubj":0,"positive":0,"negative":0}
	for w in words:
	for entry in mpqa_lex.get(w, []):
	mpqa_counts[entry["type"]] += 1
	mpqa_counts[entry["priorpolarity"]] += 1

	# 0. Support override
	if lex_counts["joy"] > 0 and any(k in text_lower for k in ["support","hope","grace"]):
	return "supportive"


	if sadness > 0.4 \
	and any(p in patterns for p in ["blame shifting","guilt tripping","recovery phase"]):
	return "performative regret"

	# 2. Coercive Warmth
	if (joy > 0.3 or sadness > 0.4) \
	and (lex_counts["joy"] > 0 or lex_counts["sadness"] > 0) \
	and any(p in patterns for p in ["control","gaslighting"]):
	return "coercive warmth"

	# 3. Cold Invalidation
	if (neutral + disgust) > 0.5 \
	and lex_counts["disgust"] > 0 \
	and any(p in patterns for p in ["dismissiveness","projection","obscure language"]):
	return "cold invalidation"

	# 4. Genuine Vulnerability
	if (sadness + fear) > 0.5 \
	and lex_counts["sadness"] > 0 and lex_counts["fear"] > 0 \
	and all(p == "recovery phase" for p in patterns):
	return "genuine vulnerability"

	# 5. Emotional Threat
	if (anger + disgust) > 0.5 \
	and (lex_counts["anger"] > 0 or lex_counts["disgust"] > 0) \
	and any(p in patterns for p in ["control","threat","insults","dismissiveness"]):
	return "emotional threat"

	# 6. Weaponized Sadness
	if sadness > 0.6 \
	and lex_counts["sadness"] > 0 \
	and any(p in patterns for p in ["guilt tripping","projection"]):
	return "weaponized sadness"

	# 7. Toxic Resignation
	if neutral > 0.5 \
	and any(p in patterns for p in ["dismissiveness","obscure language"]) \
	and lex_counts["disgust"] == 0:
	return "toxic resignation"

	# 8. Indignant Reproach
	if anger > 0.5 \
	and lex_counts["anger"] > 0 \
	and any(p in patterns for p in ["guilt tripping","contradictory statements"]):
	return "indignant reproach"

	# 9. Confrontational
	if anger > 0.6 \
	and lex_counts["anger"] > 0 \
	and patterns:
	return "confrontational"

	# 10. Passive Aggression
	if neutral > 0.6 \
	and lex_counts["disgust"] > 0 \
	and any(p in patterns for p in ["dismissiveness","projection"]):
	return "passive aggression"

	# 11. Sarcastic Mockery
	if joy > 0.3 \
	and lex_counts["joy"] > 0 \
	and "insults" in patterns:
	return "sarcastic mockery"

	# 12. Menacing Threat
	if fear > 0.3 \
	and lex_counts["fear"] > 0 \
	and "threat" in patterns:
	return "menacing threat"

	# 13. Pleading Concern
	if sadness > 0.3 \
	and lex_counts["sadness"] > 0 \
	and any(k in text_lower for k in APOLOGY_KEYWORDS) \
	and not patterns:
	return "pleading concern"

	# 14. Fear-mongering
	if (fear + disgust) > 0.5 \
	and lex_counts["fear"] > 0 \
	and "projection" in patterns:
	return "fear-mongering"


	# 16. Empathetic Solidarity
	if joy > 0.2 and sadness > 0.2 \
	and lex_counts["joy"] > 0 and lex_counts["sadness"] > 0 \
	and not patterns:
	return "empathetic solidarity"

	# 17. Assertive Boundary
	if anger > 0.4 \
	and lex_counts["anger"] > 0 \
	and "control" in patterns:
	return "assertive boundary"

	# 18. Stonewalling
	if neutral > 0.7 \
	and lex_counts["disgust"] == 0 \
	and not patterns:
	return "stonewalling"

	return None

	# ——— 5) Single-message analysis ———————————————————————————————————————————
	def analyze_message(text):
	text_lower = text.lower()
	emotion_profile = get_emotion_profile(text)

	# blend in NRC-EmoLex
	lex_counts = score_emolex(text_lower)
	max_lex = max(lex_counts.values()) or 1.0
	lex_scores = {emo: cnt/max_lex for emo, cnt in lex_counts.items()}
	for emo in emotion_profile:
	emotion_profile[emo] = max(emotion_profile[emo], lex_scores.get(emo,0))

	# abuse-patterns
	toks = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
	with torch.no_grad():
	logits = model(**toks).logits.squeeze(0)
	scores = torch.sigmoid(logits).cpu().numpy()
	active_patterns = [lab for lab, sc in zip(LABELS, scores) if sc >= THRESHOLDS[lab]]
	if any(k in text_lower for k in APOLOGY_KEYWORDS) and "recovery phase" not in active_patterns:
	active_patterns.append("recovery phase")

	tone_tag = get_emotional_tone_tag(emotion_profile, active_patterns, text_lower)
	return {
	"emotion_profile": emotion_profile,
	"active_patterns": active_patterns,
	"tone_tag": tone_tag
	}

	# ——— 6) Composite wrapper ———————————————————————————————————————————————
	def analyze_composite(uploaded_file, *texts):
	outputs = []

	# file handling / OCR
	if uploaded_file is not None:
	try:
	raw = uploaded_file.read()
	except:
	with open(uploaded_file, "rb") as f:
	raw = f.read()

	name = uploaded_file.name.lower() if hasattr(uploaded_file,"name") else uploaded_file.lower()
	if name.endswith((".png",".jpg",".jpeg",".bmp",".gif",".tiff")):
	img = Image.open(io.BytesIO(raw))
	arr = np.array(img.convert("RGB"))
	content = "\n".join(ocr_reader.readtext(arr, detail=0))
	else:
	try:
	content = raw.decode("utf-8")
	except UnicodeDecodeError:
	content = raw.decode("latin-1")

	r = analyze_message(content)
	outputs.append(
	"── Uploaded File ──\n"
	f"Emotion Profile : {r['emotion_profile']}\n"
	f"Active Patterns : {r['active_patterns']}\n"
	f"Emotional Tone : {r['tone_tag']}\n"
	)

	# free-text messages
	for idx, txt in enumerate(texts, start=1):
	if not txt:
	continue
	r = analyze_message(txt)
	outputs.append(
	f"── Message {idx} ──\n"
	f"Emotion Profile : {r['emotion_profile']}\n"
	f"Active Patterns : {r['active_patterns']}\n"
	f"Emotional Tone : {r['tone_tag']}\n"
	)

	if not outputs:
	return "Please enter at least one message."
	return "\n".join(outputs)

	# ——— 7) Gradio interface ———————————————————————————————————————————————
	message_inputs = [gr.Textbox(label="Message")]

	iface = gr.Interface(
	fn=analyze_composite,
	inputs=[gr.File(file_types=[".txt",".png",".jpg",".jpeg"], label="Upload text or image")] + message_inputs,
	outputs=gr.Textbox(label="Analysis"),
	title="Tether Analyzer (extended tone tags)",
	description="Emotion profiling, pattern tags, and a wide set of nuanced tone categories—no abuse score or DARVO."
	)

	if __name__ == "__main__":
	iface.launch()