Update src/ethics/ethics.py

Browse files

Files changed (1) hide show

src/ethics/ethics.py +77 -20

src/ethics/ethics.py CHANGED Viewed

@@ -16,33 +16,90 @@ def check_agent_autonomy(question: str, authorization_level: int):
 # Used to monitor whether the system is acting too independently or out of context
 def assess_ethical_risk(content, domain="scientific"):
     """
-    Evaluates whether the AI response contains implicit ethical risks.
-    Analyzes textual content for potential bias, manipulation, or inappropriateness.
     """
-    risk = {
-        "potential_manipulation": False,
-        "misinformation_risk": False,
-        "linguistic_bias": False,
-        "critical_topic": False,
-        "neutral_language": True,
-        "environmental_risk": "Moderate",
-        "revision_suggestion": None
-    }
     text_lower = content.lower()
-    if "vaccine" in text_lower or "gender" in text_lower or "politics" in text_lower:
-        risk["critical_topic"] = True
     if "all men" in text_lower or "women are" in text_lower:
-        risk["linguistic_bias"] = True
-        risk["neutral_language"] = False
-        risk["revision_suggestion"] = "Rephrase with attention to inclusive language."
     if "according to experts without citing sources" in text_lower:
-        risk["misinformation_risk"] = True
-        risk["revision_suggestion"] = "Add reliable sources or remove absolute claims."
-    return risk
 # Example prompt
 prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."

 # Used to monitor whether the system is acting too independently or out of context
 def assess_ethical_risk(content, domain="scientific"):
     """
+    Evaluates ethical risks in AI-generated content using a scalar scoring system.
+    Assigns weights to detected patterns and requests LLM-based bias scoring.
+    Triggers revision if overall risk exceeds threshold.
     """
     text_lower = content.lower()
+    risk_score = 0.0
+    risk_sources = []
+    # Pattern-based scoring
+    critical_terms = ["vaccine", "gender", "politics", "religion", "ethnicity"]
+    if any(term in text_lower for term in critical_terms):
+        risk_score += 0.8
+        risk_sources.append("Critical topic")
     if "all men" in text_lower or "women are" in text_lower:
+        risk_score += 0.5
+        risk_sources.append("Linguistic bias")
     if "according to experts without citing sources" in text_lower:
+        risk_score += 0.5
+        risk_sources.append("Misinformation")
+    cultural_patterns = [
+        "asians are", "africans are", "europeans are", "americans are",
+        "italians are", "japanese are", "muslims are", "jews are"
+    ]
+    if any(p in text_lower for p in cultural_patterns):
+        risk_score += 0.5
+        risk_sources.append("Cultural generalization")
+    stereotype_patterns = [
+        "psychologists are", "engineers are", "artists are", "scientists are",
+        "philosophers are", "mathematicians are"
+    ]
+    if any(p in text_lower for p in stereotype_patterns):
+        risk_score += 0.5
+        risk_sources.append("Disciplinary stereotype")
+    normative_patterns = [
+        "should behave", "must act", "it's right that", "it's wrong to",
+        "they ought to", "is unacceptable", "is appropriate"
+    ]
+    if any(p in text_lower for p in normative_patterns):
+        risk_score += 0.5
+        risk_sources.append("Normative implication")
+    # LLM-based scoring
+    bias_prompt = f"Rate the cultural or ethical bias of the following text on a scale from 0 to 1:\n\n{content}\n\nScore:"
+    llm_score_raw = llm.invoke(bias_prompt).content.strip()
+    try:
+        llm_score = float(llm_score_raw)
+        risk_score += llm_score
+        risk_sources.append(f"LLM bias score: {llm_score}")
+    except ValueError:
+        llm_score = 0.0  # fallback if parsing fails
+    # Normalize and classify
+    overall_risk = min(risk_score, 1.0)
+    if overall_risk <= 0.3:
+        risk_level = "Basso"
+    elif overall_risk <= 0.7:
+        risk_level = "Medio"
+    else:
+        risk_level = "Alto"
+    # Riformulazione automatica se rischio alto
+    revised_response = None
+    revision_suggestion = None
+    if overall_risk > 0.6:
+        revision_suggestion = "Ethical risk detected. Rewrite to reduce bias and improve neutrality."
+        revision_prompt = f"""Rewrite this to reduce ethical bias and improve neutrality:
+Original: {content}
+Rewritten:"""
+        revised_response = llm.invoke(revision_prompt).content.strip()
+    return {
+        "overall_risk": round(overall_risk, 2),
+        "risk_level": risk_level,
+        "risk_sources": risk_sources,
+        "revision_suggestion": revision_suggestion,
+        "revised_response": revised_response
+    }
 # Example prompt
 prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."