Transformers
Italian
English
semantic-search
explainable-ai
faiss
ai-ethics
responsible-ai
llm
prompt-engineering
multimodal-ai
ai-transparency
ethical-intelligence
explainable-llm
cognitive-ai
ethical-ai
scientific-retrieval
modular-ai
memory-augmented-llm
trustworthy-ai
reasoning-engine
ai-alignment
next-gen-llm
thinking-machines
open-source-ai
explainability
ai-research
semantic audit
cognitive agent
human-centered-ai
Update src/ethics/ethics.py
Browse files- src/ethics/ethics.py +77 -20
src/ethics/ethics.py
CHANGED
|
@@ -16,33 +16,90 @@ def check_agent_autonomy(question: str, authorization_level: int):
|
|
| 16 |
# Used to monitor whether the system is acting too independently or out of context
|
| 17 |
def assess_ethical_risk(content, domain="scientific"):
|
| 18 |
"""
|
| 19 |
-
Evaluates
|
| 20 |
-
|
|
|
|
| 21 |
"""
|
| 22 |
-
risk = {
|
| 23 |
-
"potential_manipulation": False,
|
| 24 |
-
"misinformation_risk": False,
|
| 25 |
-
"linguistic_bias": False,
|
| 26 |
-
"critical_topic": False,
|
| 27 |
-
"neutral_language": True,
|
| 28 |
-
"environmental_risk": "Moderate",
|
| 29 |
-
"revision_suggestion": None
|
| 30 |
-
}
|
| 31 |
-
|
| 32 |
text_lower = content.lower()
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
if "all men" in text_lower or "women are" in text_lower:
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
risk["revision_suggestion"] = "Rephrase with attention to inclusive language."
|
| 40 |
|
| 41 |
if "according to experts without citing sources" in text_lower:
|
| 42 |
-
|
| 43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
# Example prompt
|
| 48 |
prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."
|
|
|
|
| 16 |
# Used to monitor whether the system is acting too independently or out of context
|
| 17 |
def assess_ethical_risk(content, domain="scientific"):
|
| 18 |
"""
|
| 19 |
+
Evaluates ethical risks in AI-generated content using a scalar scoring system.
|
| 20 |
+
Assigns weights to detected patterns and requests LLM-based bias scoring.
|
| 21 |
+
Triggers revision if overall risk exceeds threshold.
|
| 22 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
text_lower = content.lower()
|
| 24 |
+
risk_score = 0.0
|
| 25 |
+
risk_sources = []
|
| 26 |
+
|
| 27 |
+
# Pattern-based scoring
|
| 28 |
+
critical_terms = ["vaccine", "gender", "politics", "religion", "ethnicity"]
|
| 29 |
+
if any(term in text_lower for term in critical_terms):
|
| 30 |
+
risk_score += 0.8
|
| 31 |
+
risk_sources.append("Critical topic")
|
| 32 |
|
| 33 |
if "all men" in text_lower or "women are" in text_lower:
|
| 34 |
+
risk_score += 0.5
|
| 35 |
+
risk_sources.append("Linguistic bias")
|
|
|
|
| 36 |
|
| 37 |
if "according to experts without citing sources" in text_lower:
|
| 38 |
+
risk_score += 0.5
|
| 39 |
+
risk_sources.append("Misinformation")
|
| 40 |
+
|
| 41 |
+
cultural_patterns = [
|
| 42 |
+
"asians are", "africans are", "europeans are", "americans are",
|
| 43 |
+
"italians are", "japanese are", "muslims are", "jews are"
|
| 44 |
+
]
|
| 45 |
+
if any(p in text_lower for p in cultural_patterns):
|
| 46 |
+
risk_score += 0.5
|
| 47 |
+
risk_sources.append("Cultural generalization")
|
| 48 |
+
|
| 49 |
+
stereotype_patterns = [
|
| 50 |
+
"psychologists are", "engineers are", "artists are", "scientists are",
|
| 51 |
+
"philosophers are", "mathematicians are"
|
| 52 |
+
]
|
| 53 |
+
if any(p in text_lower for p in stereotype_patterns):
|
| 54 |
+
risk_score += 0.5
|
| 55 |
+
risk_sources.append("Disciplinary stereotype")
|
| 56 |
+
|
| 57 |
+
normative_patterns = [
|
| 58 |
+
"should behave", "must act", "it's right that", "it's wrong to",
|
| 59 |
+
"they ought to", "is unacceptable", "is appropriate"
|
| 60 |
+
]
|
| 61 |
+
if any(p in text_lower for p in normative_patterns):
|
| 62 |
+
risk_score += 0.5
|
| 63 |
+
risk_sources.append("Normative implication")
|
| 64 |
|
| 65 |
+
# LLM-based scoring
|
| 66 |
+
bias_prompt = f"Rate the cultural or ethical bias of the following text on a scale from 0 to 1:\n\n{content}\n\nScore:"
|
| 67 |
+
llm_score_raw = llm.invoke(bias_prompt).content.strip()
|
| 68 |
+
|
| 69 |
+
try:
|
| 70 |
+
llm_score = float(llm_score_raw)
|
| 71 |
+
risk_score += llm_score
|
| 72 |
+
risk_sources.append(f"LLM bias score: {llm_score}")
|
| 73 |
+
except ValueError:
|
| 74 |
+
llm_score = 0.0 # fallback if parsing fails
|
| 75 |
+
|
| 76 |
+
# Normalize and classify
|
| 77 |
+
overall_risk = min(risk_score, 1.0)
|
| 78 |
+
|
| 79 |
+
if overall_risk <= 0.3:
|
| 80 |
+
risk_level = "Basso"
|
| 81 |
+
elif overall_risk <= 0.7:
|
| 82 |
+
risk_level = "Medio"
|
| 83 |
+
else:
|
| 84 |
+
risk_level = "Alto"
|
| 85 |
+
|
| 86 |
+
# Riformulazione automatica se rischio alto
|
| 87 |
+
revised_response = None
|
| 88 |
+
revision_suggestion = None
|
| 89 |
+
if overall_risk > 0.6:
|
| 90 |
+
revision_suggestion = "Ethical risk detected. Rewrite to reduce bias and improve neutrality."
|
| 91 |
+
revision_prompt = f"""Rewrite this to reduce ethical bias and improve neutrality:
|
| 92 |
+
Original: {content}
|
| 93 |
+
Rewritten:"""
|
| 94 |
+
revised_response = llm.invoke(revision_prompt).content.strip()
|
| 95 |
+
|
| 96 |
+
return {
|
| 97 |
+
"overall_risk": round(overall_risk, 2),
|
| 98 |
+
"risk_level": risk_level,
|
| 99 |
+
"risk_sources": risk_sources,
|
| 100 |
+
"revision_suggestion": revision_suggestion,
|
| 101 |
+
"revised_response": revised_response
|
| 102 |
+
}
|
| 103 |
|
| 104 |
# Example prompt
|
| 105 |
prompt = "Discuss the potential risks of generative artificial intelligence in the context of medicine."
|