SamanthaStorm commited on
Commit
88be3f1
·
verified ·
1 Parent(s): fbb63df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -51
app.py CHANGED
@@ -886,7 +886,7 @@ def detect_explicit_abuse(text):
886
  return True
887
 
888
  return False
889
- @spaces.GPU
890
  @spaces.GPU
891
  def analyze_single_message(text, thresholds):
892
  """Analyze a single message for abuse patterns with boundary assessment"""
@@ -898,52 +898,51 @@ def analyze_single_message(text, thresholds):
898
  logger.debug("Empty text, returning zeros")
899
  return 0.0, [], [], {"label": "none"}, 1, 0.0, None, {'assessment': 'neutral', 'confidence': 0.5}
900
 
901
- # BOUNDARY HEALTH CHECK - Add this new section
902
- logger.debug("\n🛡️ BOUNDARY HEALTH ANALYSIS")
903
- logger.debug("-" * 40)
904
- boundary_class, boundary_confidence = predict_boundary_health(text) # Unpack the tuple
905
- boundary_assessment = get_boundary_assessment(text, boundary_class, boundary_confidence)
906
- logger.debug(f"Boundary Class: {boundary_class} ({['Respected', 'Violated', 'Dismissed', 'Manipulative'][boundary_class]})")
907
- logger.debug(f"Boundary Confidence: {boundary_confidence:.3f}")
908
- logger.debug(f"Boundary Assessment: {boundary_assessment['label']}")
909
-
910
- # Get sentiment EARLY - BEFORE any early returns
911
- sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
912
- sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
913
- with torch.no_grad():
914
- sent_logits = sentiment_model(**sent_inputs).logits[0]
915
- sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
916
-
917
- # Add detailed logging
918
- logger.debug("\n🎭 SENTIMENT ANALYSIS DETAILS")
919
- logger.debug(f"Raw logits: {sent_logits}")
920
- logger.debug(f"Probabilities: supportive={sent_probs[0]:.3f}, undermining={sent_probs[1]:.3f}")
921
-
922
- # Make sure we're using the correct index mapping
923
- sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
924
- logger.debug(f"Selected sentiment: {sentiment}")
925
-
926
- # UPDATE THE OVERRIDE CONDITION:
927
- # Now we need to check if boundary_class == 0 (Respected) instead of checking a probability
928
- if (boundary_class != 0 and # Not "Respected"
929
- boundary_confidence < 0.7 and
930
- sentiment == "supportive" and
931
- len(text.split()) > 50 and
932
- any(phrase in text.lower() for phrase in [
933
- "i need you to", "i want to understand", "this isn't about",
934
- "about accuracy", "willing to do something different"
935
- ])):
936
-
937
- logger.debug("🔄 Boundary assessment override: Sophisticated healthy boundary detected")
938
- boundary_assessment = {
939
- 'assessment': 'healthy',
940
- 'label': 'Healthy Boundary (Sophisticated)',
941
- 'confidence': 0.85,
942
- 'description': 'Complex but healthy boundary-setting communication',
943
- 'recommendations': ['Continue this thoughtful, direct approach']
944
- }
945
 
946
-
947
  # EARLY SUPPORTIVE MESSAGE CHECK
948
  innocent_indicators = [
949
  'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
@@ -952,10 +951,11 @@ def analyze_single_message(text, thresholds):
952
  ]
953
 
954
  # Enhanced early return check - now includes boundary health
 
955
  if (any(indicator in text.lower() for indicator in innocent_indicators) and
956
  len(text.split()) < 20 and
957
  not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
958
- healthy_prob > 0.7): # Added boundary health check
959
 
960
  # If sentiment is strongly supportive AND boundary health is good, return early
961
  if sent_probs[0] > 0.8: # 80% supportive
@@ -1025,11 +1025,12 @@ def analyze_single_message(text, thresholds):
1025
  abuse_score = max(abuse_score, 70.0)
1026
 
1027
  # Apply boundary health modifier to abuse score
1028
- if healthy_prob > 0.8 and not explicit_abuse:
1029
- # Very healthy boundaries - cap abuse score much lower
 
1030
  abuse_score = min(abuse_score, 20.0)
1031
  logger.debug(f"Capped abuse score to {abuse_score} due to very healthy boundaries")
1032
- elif healthy_prob > 0.6 and sentiment == "supportive":
1033
  # Moderately healthy boundaries with supportive sentiment
1034
  abuse_score = min(abuse_score, 35.0)
1035
  logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
@@ -1058,8 +1059,9 @@ def analyze_single_message(text, thresholds):
1058
  log_emotional_tone_usage(tone_tag, threshold_labels)
1059
 
1060
  # Check for the specific combination (final safety check)
 
1061
  highest_pattern = max(matched_scores, key=lambda x: x[1])[0] if matched_scores else None
1062
- if sentiment == "supportive" and tone_tag == "neutral" and highest_pattern == "obscure language" and healthy_prob > 0.6:
1063
  logger.debug("Message classified as likely non-abusive (supportive, neutral, healthy boundaries). Returning low risk.")
1064
  return 0.0, [], [], {"label": "supportive"}, 1, 0.0, "neutral", boundary_assessment
1065
 
@@ -1075,6 +1077,7 @@ def analyze_single_message(text, thresholds):
1075
  logger.error(f"Error in analyze_single_message: {e}")
1076
  logger.error(f"Traceback: {traceback.format_exc()}")
1077
  return 0.0, [], [], {"label": "error"}, 1, 0.0, None, {'assessment': 'error', 'confidence': 0.0}
 
1078
  def generate_abuse_score_chart(dates, scores, patterns):
1079
  """Generate a timeline chart of abuse scores"""
1080
  try:
 
886
  return True
887
 
888
  return False
889
+
890
  @spaces.GPU
891
  def analyze_single_message(text, thresholds):
892
  """Analyze a single message for abuse patterns with boundary assessment"""
 
898
  logger.debug("Empty text, returning zeros")
899
  return 0.0, [], [], {"label": "none"}, 1, 0.0, None, {'assessment': 'neutral', 'confidence': 0.5}
900
 
901
+ # BOUNDARY HEALTH CHECK - Add this new section
902
+ logger.debug("\n🛡️ BOUNDARY HEALTH ANALYSIS")
903
+ logger.debug("-" * 40)
904
+ boundary_class, boundary_confidence = predict_boundary_health(text) # Unpack the tuple
905
+ boundary_assessment = get_boundary_assessment(text, boundary_class, boundary_confidence)
906
+ logger.debug(f"Boundary Class: {boundary_class} ({['Respected', 'Violated', 'Dismissed', 'Manipulative'][boundary_class]})")
907
+ logger.debug(f"Boundary Confidence: {boundary_confidence:.3f}")
908
+ logger.debug(f"Boundary Assessment: {boundary_assessment['label']}")
909
+
910
+ # Get sentiment EARLY - BEFORE any early returns
911
+ sent_inputs = sentiment_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
912
+ sent_inputs = {k: v.to(device) for k, v in sent_inputs.items()}
913
+ with torch.no_grad():
914
+ sent_logits = sentiment_model(**sent_inputs).logits[0]
915
+ sent_probs = torch.softmax(sent_logits, dim=-1).cpu().numpy()
916
+
917
+ # Add detailed logging
918
+ logger.debug("\n🎭 SENTIMENT ANALYSIS DETAILS")
919
+ logger.debug(f"Raw logits: {sent_logits}")
920
+ logger.debug(f"Probabilities: supportive={sent_probs[0]:.3f}, undermining={sent_probs[1]:.3f}")
921
+
922
+ # Make sure we're using the correct index mapping
923
+ sentiment = SENTIMENT_LABELS[int(np.argmax(sent_probs))]
924
+ logger.debug(f"Selected sentiment: {sentiment}")
925
+
926
+ # UPDATE THE OVERRIDE CONDITION:
927
+ # Now we need to check if boundary_class == 0 (Respected) instead of checking a probability
928
+ if (boundary_class != 0 and # Not "Respected"
929
+ boundary_confidence < 0.7 and
930
+ sentiment == "supportive" and
931
+ len(text.split()) > 50 and
932
+ any(phrase in text.lower() for phrase in [
933
+ "i need you to", "i want to understand", "this isn't about",
934
+ "about accuracy", "willing to do something different"
935
+ ])):
936
+
937
+ logger.debug("🔄 Boundary assessment override: Sophisticated healthy boundary detected")
938
+ boundary_assessment = {
939
+ 'assessment': 'healthy',
940
+ 'label': 'Healthy Boundary (Sophisticated)',
941
+ 'confidence': 0.85,
942
+ 'description': 'Complex but healthy boundary-setting communication',
943
+ 'recommendations': ['Continue this thoughtful, direct approach']
944
+ }
945
 
 
946
  # EARLY SUPPORTIVE MESSAGE CHECK
947
  innocent_indicators = [
948
  'broken', 'not working', 'cracked', 'glass', 'screen', 'phone',
 
951
  ]
952
 
953
  # Enhanced early return check - now includes boundary health
954
+ # NOTE: Changed healthy_prob to boundary_class == 0 (Respected)
955
  if (any(indicator in text.lower() for indicator in innocent_indicators) and
956
  len(text.split()) < 20 and
957
  not any(threat in text.lower() for threat in ['kill', 'hurt', 'destroy', 'hate']) and
958
+ boundary_class == 0): # Updated boundary health check
959
 
960
  # If sentiment is strongly supportive AND boundary health is good, return early
961
  if sent_probs[0] > 0.8: # 80% supportive
 
1025
  abuse_score = max(abuse_score, 70.0)
1026
 
1027
  # Apply boundary health modifier to abuse score
1028
+ # NOTE: Updated to use boundary_class instead of healthy_prob
1029
+ if boundary_class == 0 and boundary_confidence > 0.8 and not explicit_abuse:
1030
+ # Very healthy boundaries (Respected) - cap abuse score much lower
1031
  abuse_score = min(abuse_score, 20.0)
1032
  logger.debug(f"Capped abuse score to {abuse_score} due to very healthy boundaries")
1033
+ elif boundary_class == 0 and boundary_confidence > 0.6 and sentiment == "supportive":
1034
  # Moderately healthy boundaries with supportive sentiment
1035
  abuse_score = min(abuse_score, 35.0)
1036
  logger.debug(f"Capped abuse score to {abuse_score} due to healthy boundaries")
 
1059
  log_emotional_tone_usage(tone_tag, threshold_labels)
1060
 
1061
  # Check for the specific combination (final safety check)
1062
+ # NOTE: Updated to use boundary_class instead of healthy_prob
1063
  highest_pattern = max(matched_scores, key=lambda x: x[1])[0] if matched_scores else None
1064
+ if sentiment == "supportive" and tone_tag == "neutral" and highest_pattern == "obscure language" and boundary_class == 0:
1065
  logger.debug("Message classified as likely non-abusive (supportive, neutral, healthy boundaries). Returning low risk.")
1066
  return 0.0, [], [], {"label": "supportive"}, 1, 0.0, "neutral", boundary_assessment
1067
 
 
1077
  logger.error(f"Error in analyze_single_message: {e}")
1078
  logger.error(f"Traceback: {traceback.format_exc()}")
1079
  return 0.0, [], [], {"label": "error"}, 1, 0.0, None, {'assessment': 'error', 'confidence': 0.0}
1080
+
1081
  def generate_abuse_score_chart(dates, scores, patterns):
1082
  """Generate a timeline chart of abuse scores"""
1083
  try: