elly99 commited on
Commit
5d57d47
·
verified ·
1 Parent(s): 2e71a66

Update src/ethics/ethics.py

Browse files
Files changed (1) hide show
  1. src/ethics/ethics.py +77 -20
src/ethics/ethics.py CHANGED
@@ -16,33 +16,90 @@ def check_agent_autonomy(question: str, authorization_level: int):
16
  # Used to monitor whether the system is acting too independently or out of context
17
  def assess_ethical_risk(content, domain="scientific"):
18
  """
19
- Evaluates whether the AI response contains implicit ethical risks.
20
- Analyzes textual content for potential bias, manipulation, or inappropriateness.
 
21
  """
22
- risk = {
23
- "potential_manipulation": False,
24
- "misinformation_risk": False,
25
- "linguistic_bias": False,
26
- "critical_topic": False,
27
- "neutral_language": True,
28
- "environmental_risk": "Moderate",
29
- "revision_suggestion": None
30
- }
31
-
32
  text_lower = content.lower()
33
- if "vaccine" in text_lower or "gender" in text_lower or "politics" in text_lower:
34
- risk["critical_topic"] = True
 
 
 
 
 
 
35
 
36
  if "all men" in text_lower or "women are" in text_lower:
37
- risk["linguistic_bias"] = True
38
- risk["neutral_language"] = False
39
- risk["revision_suggestion"] = "Rephrase with attention to inclusive language."
40
 
41
  if "according to experts without citing sources" in text_lower:
42
- risk["misinformation_risk"] = True
43
- risk["revision_suggestion"] = "Add reliable sources or remove absolute claims."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- return risk
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  # Example prompt
48
  prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."
 
16
  # Used to monitor whether the system is acting too independently or out of context
17
  def assess_ethical_risk(content, domain="scientific"):
18
  """
19
+ Evaluates ethical risks in AI-generated content using a scalar scoring system.
20
+ Assigns weights to detected patterns and requests LLM-based bias scoring.
21
+ Triggers revision if overall risk exceeds threshold.
22
  """
 
 
 
 
 
 
 
 
 
 
23
  text_lower = content.lower()
24
+ risk_score = 0.0
25
+ risk_sources = []
26
+
27
+ # Pattern-based scoring
28
+ critical_terms = ["vaccine", "gender", "politics", "religion", "ethnicity"]
29
+ if any(term in text_lower for term in critical_terms):
30
+ risk_score += 0.8
31
+ risk_sources.append("Critical topic")
32
 
33
  if "all men" in text_lower or "women are" in text_lower:
34
+ risk_score += 0.5
35
+ risk_sources.append("Linguistic bias")
 
36
 
37
  if "according to experts without citing sources" in text_lower:
38
+ risk_score += 0.5
39
+ risk_sources.append("Misinformation")
40
+
41
+ cultural_patterns = [
42
+ "asians are", "africans are", "europeans are", "americans are",
43
+ "italians are", "japanese are", "muslims are", "jews are"
44
+ ]
45
+ if any(p in text_lower for p in cultural_patterns):
46
+ risk_score += 0.5
47
+ risk_sources.append("Cultural generalization")
48
+
49
+ stereotype_patterns = [
50
+ "psychologists are", "engineers are", "artists are", "scientists are",
51
+ "philosophers are", "mathematicians are"
52
+ ]
53
+ if any(p in text_lower for p in stereotype_patterns):
54
+ risk_score += 0.5
55
+ risk_sources.append("Disciplinary stereotype")
56
+
57
+ normative_patterns = [
58
+ "should behave", "must act", "it's right that", "it's wrong to",
59
+ "they ought to", "is unacceptable", "is appropriate"
60
+ ]
61
+ if any(p in text_lower for p in normative_patterns):
62
+ risk_score += 0.5
63
+ risk_sources.append("Normative implication")
64
 
65
+ # LLM-based scoring
66
+ bias_prompt = f"Rate the cultural or ethical bias of the following text on a scale from 0 to 1:\n\n{content}\n\nScore:"
67
+ llm_score_raw = llm.invoke(bias_prompt).content.strip()
68
+
69
+ try:
70
+ llm_score = float(llm_score_raw)
71
+ risk_score += llm_score
72
+ risk_sources.append(f"LLM bias score: {llm_score}")
73
+ except ValueError:
74
+ llm_score = 0.0 # fallback if parsing fails
75
+
76
+ # Normalize and classify
77
+ overall_risk = min(risk_score, 1.0)
78
+
79
+ if overall_risk <= 0.3:
80
+ risk_level = "Basso"
81
+ elif overall_risk <= 0.7:
82
+ risk_level = "Medio"
83
+ else:
84
+ risk_level = "Alto"
85
+
86
+ # Riformulazione automatica se rischio alto
87
+ revised_response = None
88
+ revision_suggestion = None
89
+ if overall_risk > 0.6:
90
+ revision_suggestion = "Ethical risk detected. Rewrite to reduce bias and improve neutrality."
91
+ revision_prompt = f"""Rewrite this to reduce ethical bias and improve neutrality:
92
+ Original: {content}
93
+ Rewritten:"""
94
+ revised_response = llm.invoke(revision_prompt).content.strip()
95
+
96
+ return {
97
+ "overall_risk": round(overall_risk, 2),
98
+ "risk_level": risk_level,
99
+ "risk_sources": risk_sources,
100
+ "revision_suggestion": revision_suggestion,
101
+ "revised_response": revised_response
102
+ }
103
 
104
  # Example prompt
105
  prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."