Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -886,7 +886,7 @@ def detect_explicit_abuse(text):
|
|
| 886 |
return True
|
| 887 |
|
| 888 |
return False
|
| 889 |
-
|
| 890 |
@spaces.GPU
|
| 891 |
def analyze_single_message(text, thresholds):
|
| 892 |
"""Analyze a single message for abuse patterns with boundary assessment"""
|
|
@@ -898,52 +898,51 @@ def analyze_single_message(text, thresholds):
|
|
| 898 |
logger.debug("Empty text, returning zeros")
|
| 899 |
return 0.0, [], [], {"label": "none"}, 1, 0.0, None, {'assessment': 'neutral', 'confidence': 0.5}
|
| 900 |
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
| 942 |
-
|
| 943 |
-
|
| 944 |
-
|
| 945 |
|
| 946 |
-
|
| 947 |
# EARLY SUPPORTIVE MESSAGE CHECK
|
| 948 |
innocent_indicators = [
|
| 949 |
'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
|
|
@@ -952,10 +951,11 @@ def analyze_single_message(text, thresholds):
|
|
| 952 |
]
|
| 953 |
|
| 954 |
# Enhanced early return check - now includes boundary health
|
|
|
|
| 955 |
if (any(indicator in text.lower() for indicator in innocent_indicators) and
|
| 956 |
len(text.split()) < 20 and
|
| 957 |
not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
|
| 958 |
-
|
| 959 |
|
| 960 |
# If sentiment is strongly supportive AND boundary health is good, return early
|
| 961 |
if sent_probs[0] > 0.8: # 80% supportive
|
|
@@ -1025,11 +1025,12 @@ def analyze_single_message(text, thresholds):
|
|
| 1025 |
abuse_score = max(abuse_score, 70.0)
|
| 1026 |
|
| 1027 |
# Apply boundary health modifier to abuse score
|
| 1028 |
-
|
| 1029 |
-
|
|
|
|
| 1030 |
abuse_score = min(abuse_score, 20.0)
|
| 1031 |
logger.debug(f"Capped abuse score to {abuse_score} due to very healthy boundaries")
|
| 1032 |
-
elif
|
| 1033 |
# Moderately healthy boundaries with supportive sentiment
|
| 1034 |
abuse_score = min(abuse_score, 35.0)
|
| 1035 |
logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
|
|
@@ -1058,8 +1059,9 @@ def analyze_single_message(text, thresholds):
|
|
| 1058 |
log_emotional_tone_usage(tone_tag, threshold_labels)
|
| 1059 |
|
| 1060 |
# Check for the specific combination (final safety check)
|
|
|
|
| 1061 |
highest_pattern = max(matched_scores, key=lambda x: x[1])[0] if matched_scores else None
|
| 1062 |
-
if sentiment == "supportive" and tone_tag == "neutral" and highest_pattern == "obscure language" and
|
| 1063 |
logger.debug("Message classified as likely non-abusive (supportive, neutral, healthy boundaries). Returning low risk.")
|
| 1064 |
return 0.0, [], [], {"label": "supportive"}, 1, 0.0, "neutral", boundary_assessment
|
| 1065 |
|
|
@@ -1075,6 +1077,7 @@ def analyze_single_message(text, thresholds):
|
|
| 1075 |
logger.error(f"Error in analyze_single_message: {e}")
|
| 1076 |
logger.error(f"Traceback: {traceback.format_exc()}")
|
| 1077 |
return 0.0, [], [], {"label": "error"}, 1, 0.0, None, {'assessment': 'error', 'confidence': 0.0}
|
|
|
|
| 1078 |
def generate_abuse_score_chart(dates, scores, patterns):
|
| 1079 |
"""Generate a timeline chart of abuse scores"""
|
| 1080 |
try:
|
|
|
|
| 886 |
return True
|
| 887 |
|
| 888 |
return False
|
| 889 |
+
|
| 890 |
@spaces.GPU
|
| 891 |
def analyze_single_message(text, thresholds):
|
| 892 |
"""Analyze a single message for abuse patterns with boundary assessment"""
|
|
|
|
| 898 |
logger.debug("Empty text, returning zeros")
|
| 899 |
return 0.0, [], [], {"label": "none"}, 1, 0.0, None, {'assessment': 'neutral', 'confidence': 0.5}
|
| 900 |
|
| 901 |
+
# BOUNDARY HEALTH CHECK - Add this new section
|
| 902 |
+
logger.debug("\n🛡️ BOUNDARY HEALTH ANALYSIS")
|
| 903 |
+
logger.debug("-" * 40)
|
| 904 |
+
boundary_class, boundary_confidence = predict_boundary_health(text) # Unpack the tuple
|
| 905 |
+
boundary_assessment = get_boundary_assessment(text, boundary_class, boundary_confidence)
|
| 906 |
+
logger.debug(f"Boundary Class: {boundary_class} ({['Respected', 'Violated', 'Dismissed', 'Manipulative'][boundary_class]})")
|
| 907 |
+
logger.debug(f"Boundary Confidence: {boundary_confidence:.3f}")
|
| 908 |
+
logger.debug(f"Boundary Assessment: {boundary_assessment['label']}")
|
| 909 |
+
|
| 910 |
+
# Get sentiment EARLY - BEFORE any early returns
|
| 911 |
+
sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
|
| 912 |
+
sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
|
| 913 |
+
with torch.no_grad():
|
| 914 |
+
sent_logits = sentiment_model(**sent_inputs).logits[0]
|
| 915 |
+
sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
|
| 916 |
+
|
| 917 |
+
# Add detailed logging
|
| 918 |
+
logger.debug("\n🎭 SENTIMENT ANALYSIS DETAILS")
|
| 919 |
+
logger.debug(f"Raw logits: {sent_logits}")
|
| 920 |
+
logger.debug(f"Probabilities: supportive={sent_probs[0]:.3f}, undermining={sent_probs[1]:.3f}")
|
| 921 |
+
|
| 922 |
+
# Make sure we're using the correct index mapping
|
| 923 |
+
sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
|
| 924 |
+
logger.debug(f"Selected sentiment: {sentiment}")
|
| 925 |
+
|
| 926 |
+
# UPDATE THE OVERRIDE CONDITION:
|
| 927 |
+
# Now we need to check if boundary_class == 0 (Respected) instead of checking a probability
|
| 928 |
+
if (boundary_class != 0 and # Not "Respected"
|
| 929 |
+
boundary_confidence < 0.7 and
|
| 930 |
+
sentiment == "supportive" and
|
| 931 |
+
len(text.split()) > 50 and
|
| 932 |
+
any(phrase in text.lower() for phrase in [
|
| 933 |
+
"i need you to", "i want to understand", "this isn't about",
|
| 934 |
+
"about accuracy", "willing to do something different"
|
| 935 |
+
])):
|
| 936 |
+
|
| 937 |
+
logger.debug("🔄 Boundary assessment override: Sophisticated healthy boundary detected")
|
| 938 |
+
boundary_assessment = {
|
| 939 |
+
'assessment': 'healthy',
|
| 940 |
+
'label': 'Healthy Boundary (Sophisticated)',
|
| 941 |
+
'confidence': 0.85,
|
| 942 |
+
'description': 'Complex but healthy boundary-setting communication',
|
| 943 |
+
'recommendations': ['Continue this thoughtful, direct approach']
|
| 944 |
+
}
|
| 945 |
|
|
|
|
| 946 |
# EARLY SUPPORTIVE MESSAGE CHECK
|
| 947 |
innocent_indicators = [
|
| 948 |
'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
|
|
|
|
| 951 |
]
|
| 952 |
|
| 953 |
# Enhanced early return check - now includes boundary health
|
| 954 |
+
# NOTE: Changed healthy_prob to boundary_class == 0 (Respected)
|
| 955 |
if (any(indicator in text.lower() for indicator in innocent_indicators) and
|
| 956 |
len(text.split()) < 20 and
|
| 957 |
not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
|
| 958 |
+
boundary_class == 0): # Updated boundary health check
|
| 959 |
|
| 960 |
# If sentiment is strongly supportive AND boundary health is good, return early
|
| 961 |
if sent_probs[0] > 0.8: # 80% supportive
|
|
|
|
| 1025 |
abuse_score = max(abuse_score, 70.0)
|
| 1026 |
|
| 1027 |
# Apply boundary health modifier to abuse score
|
| 1028 |
+
# NOTE: Updated to use boundary_class instead of healthy_prob
|
| 1029 |
+
if boundary_class == 0 and boundary_confidence > 0.8 and not explicit_abuse:
|
| 1030 |
+
# Very healthy boundaries (Respected) - cap abuse score much lower
|
| 1031 |
abuse_score = min(abuse_score, 20.0)
|
| 1032 |
logger.debug(f"Capped abuse score to {abuse_score} due to very healthy boundaries")
|
| 1033 |
+
elif boundary_class == 0 and boundary_confidence > 0.6 and sentiment == "supportive":
|
| 1034 |
# Moderately healthy boundaries with supportive sentiment
|
| 1035 |
abuse_score = min(abuse_score, 35.0)
|
| 1036 |
logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
|
|
|
|
| 1059 |
log_emotional_tone_usage(tone_tag, threshold_labels)
|
| 1060 |
|
| 1061 |
# Check for the specific combination (final safety check)
|
| 1062 |
+
# NOTE: Updated to use boundary_class instead of healthy_prob
|
| 1063 |
highest_pattern = max(matched_scores, key=lambda x: x[1])[0] if matched_scores else None
|
| 1064 |
+
if sentiment == "supportive" and tone_tag == "neutral" and highest_pattern == "obscure language" and boundary_class == 0:
|
| 1065 |
logger.debug("Message classified as likely non-abusive (supportive, neutral, healthy boundaries). Returning low risk.")
|
| 1066 |
return 0.0, [], [], {"label": "supportive"}, 1, 0.0, "neutral", boundary_assessment
|
| 1067 |
|
|
|
|
| 1077 |
logger.error(f"Error in analyze_single_message: {e}")
|
| 1078 |
logger.error(f"Traceback: {traceback.format_exc()}")
|
| 1079 |
return 0.0, [], [], {"label": "error"}, 1, 0.0, None, {'assessment': 'error', 'confidence': 0.0}
|
| 1080 |
+
|
| 1081 |
def generate_abuse_score_chart(dates, scores, patterns):
|
| 1082 |
"""Generate a timeline chart of abuse scores"""
|
| 1083 |
try:
|