LaxmiVarshithaCH
Deploy NeuralKarma to HuggingFace Spaces
553a798
"""
NeuralKarma — Karma Scoring Engine
Core inference engine that computes multi-dimensional karma scores:
- 5-axis scoring (prosociality, fairness/justice, harm avoidance, virtue, deontology)
- Temporal decay using exponential half-life model
- Ripple effect propagation through social graph
- Aggregate karma with weighted combination
"""
import math
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Dict, List, Optional, Tuple
import joblib
import numpy as np
MODEL_DIR = Path(__file__).parent / "models"
# Axis weights for aggregate score (sum to 1.0)
# Prosociality and harm avoidance weighted higher for social impact
AXIS_WEIGHTS = {
"prosociality": 0.25,
"harm_avoidance": 0.20, # derived from commonsense
"fairness": 0.20, # derived from justice
"virtue": 0.20, # derived from virtue
"duty": 0.15, # derived from deontology
}
# Map model names to display axis names
MODEL_TO_AXIS = {
"prosociality": "prosociality",
"commonsense": "harm_avoidance",
"justice": "fairness",
"virtue": "virtue",
"deontology": "duty",
}
# Half-life for temporal decay (in hours)
# After 168 hours (1 week), a score decays to 50%
KARMA_HALF_LIFE_HOURS = 168.0
# Decay constant λ = ln(2) / half_life
DECAY_LAMBDA = math.log(2) / KARMA_HALF_LIFE_HOURS
# Ripple effect damping factor (0-1)
# Each hop in the social graph reduces the ripple by this factor
RIPPLE_DAMPING = 0.6
# Maximum ripple depth
MAX_RIPPLE_DEPTH = 3
class KarmaScorer:
"""
Multi-axis karma scoring engine. Loads trained ML models and computes
real ethical impact scores for input text.
"""
def __init__(self):
self.models = {}
self.vectorizers = {}
self._loaded = False
def load_models(self):
"""Load all trained models from disk."""
if self._loaded:
return
print("Loading NeuralKarma models...")
model_files = [
("prosociality", "prosociality_model.joblib", "prosociality_vectorizer.joblib"),
("commonsense", "ethics_commonsense_model.joblib", "ethics_commonsense_vectorizer.joblib"),
("deontology", "ethics_deontology_model.joblib", "ethics_deontology_vectorizer.joblib"),
("justice", "ethics_justice_model.joblib", "ethics_justice_vectorizer.joblib"),
("virtue", "ethics_virtue_model.joblib", "ethics_virtue_vectorizer.joblib"),
("utilitarianism", "ethics_utilitarianism_model.joblib", "ethics_utilitarianism_vectorizer.joblib"),
]
for name, model_file, vec_file in model_files:
model_path = MODEL_DIR / model_file
vec_path = MODEL_DIR / vec_file
if model_path.exists() and vec_path.exists():
self.models[name] = joblib.load(model_path)
self.vectorizers[name] = joblib.load(vec_path)
print(f" [OK] Loaded {name} model")
else:
print(f" [WARNING] Model not found: {name} (will skip this axis)")
self._loaded = True
print(f" Loaded {len(self.models)} models total")
def score_text(self, text: str) -> Dict[str, float]:
"""
Score a text input across all ethical axes.
Returns dict with:
- Individual axis scores (0-100)
- Aggregate karma score (0-100)
- Raw probabilities
- Confidence level
"""
if not self._loaded:
self.load_models()
axis_scores = {}
raw_probs = {}
confidences = []
for model_name, model in self.models.items():
if model is None:
continue
vectorizer = self.vectorizers.get(model_name)
if vectorizer is None:
continue
try:
# Transform input text
X = vectorizer.transform([text])
# Get probability of positive class (ethical/prosocial)
if hasattr(model, "predict_proba"):
probs = model.predict_proba(X)[0]
# prob of positive class (index 1)
positive_prob = probs[1] if len(probs) > 1 else probs[0]
else:
# Fallback to decision function
decision = model.decision_function(X)[0]
positive_prob = 1 / (1 + math.exp(-decision)) # sigmoid
# Map model name to axis name
axis_name = MODEL_TO_AXIS.get(model_name, model_name)
# Scale to 0-100 with non-linear mapping for more spread
# Apply sigmoid-like stretching to avoid clustering around 50
stretched = self._stretch_score(positive_prob)
score = round(stretched * 100, 2)
score = max(0, min(100, score))
axis_scores[axis_name] = score
raw_probs[axis_name] = round(positive_prob, 4)
# Confidence = distance from 0.5 (higher = more confident)
confidence = abs(positive_prob - 0.5) * 2
confidences.append(confidence)
except Exception as e:
print(f" Warning: Error scoring with {model_name}: {e}")
axis_name = MODEL_TO_AXIS.get(model_name, model_name)
axis_scores[axis_name] = 50.0 # neutral fallback
raw_probs[axis_name] = 0.5
# Calculate aggregate karma score
aggregate = self.compute_aggregate(axis_scores)
# Overall confidence
avg_confidence = np.mean(confidences) if confidences else 0.0
return {
"axis_scores": axis_scores,
"aggregate_karma": aggregate,
"raw_probabilities": raw_probs,
"confidence": round(avg_confidence, 4),
"axes_evaluated": len(axis_scores),
"timestamp": datetime.now(timezone.utc).isoformat(),
}
def _stretch_score(self, prob: float) -> float:
"""
Apply non-linear stretching to probability to get better score distribution.
Uses a modified sigmoid that pushes values away from 0.5.
"""
# Center around 0
centered = (prob - 0.5) * 2 # [-1, 1]
# Apply cube root for spreading while preserving sign
if centered >= 0:
stretched = centered ** 0.7
else:
stretched = -((-centered) ** 0.7)
# Map back to [0, 1]
return (stretched + 1) / 2
def compute_aggregate(self, axis_scores: Dict[str, float]) -> float:
"""
Compute weighted aggregate karma score from individual axes.
Uses the AXIS_WEIGHTS configuration.
"""
total_weight = 0.0
weighted_sum = 0.0
for axis, weight in AXIS_WEIGHTS.items():
if axis in axis_scores:
weighted_sum += axis_scores[axis] * weight
total_weight += weight
if total_weight == 0:
return 50.0 # neutral
return round(weighted_sum / total_weight, 2)
def apply_temporal_decay(self, score: float, action_time: datetime) -> float:
"""
Apply exponential temporal decay to a karma score.
Formula: decayed_score = score × e^(-λ × t)
Where:
λ = ln(2) / half_life (decay constant)
t = elapsed hours since action
After 1 week (168h), the score decays to 50%.
After 2 weeks, 25%. After 1 month, ~6%.
"""
now = datetime.now(timezone.utc)
if action_time.tzinfo is None:
action_time = action_time.replace(tzinfo=timezone.utc)
elapsed_hours = (now - action_time).total_seconds() / 3600.0
elapsed_hours = max(0, elapsed_hours)
decay_factor = math.exp(-DECAY_LAMBDA * elapsed_hours)
return round(score * decay_factor, 2)
def compute_ripple_effect(
self,
source_score: float,
depth: int = 1,
connections: int = 3,
) -> Dict:
"""
Simulate how an ethical action's impact propagates through a social network.
At each hop:
- Impact is multiplied by RIPPLE_DAMPING
- Number of affected people grows by `connections` factor
- Total ripple is the sum of all hop impacts
Returns: {
total_impact: float,
hops: [{depth, impact_per_person, people_affected, cumulative}]
}
"""
hops = []
total_impact = 0.0
cumulative_people = 0
for d in range(1, min(depth + 1, MAX_RIPPLE_DEPTH + 1)):
# Impact decreases with each hop
impact_per_person = source_score * (RIPPLE_DAMPING ** d)
# People affected grows exponentially
people_at_depth = connections ** d
# Total impact at this depth
depth_impact = impact_per_person * people_at_depth
cumulative_people += people_at_depth
total_impact += depth_impact
hops.append({
"depth": d,
"impact_per_person": round(impact_per_person, 2),
"people_affected": people_at_depth,
"depth_total_impact": round(depth_impact, 2),
"cumulative_people": cumulative_people,
"cumulative_impact": round(total_impact, 2),
})
return {
"source_score": source_score,
"total_ripple_impact": round(total_impact, 2),
"total_people_reached": cumulative_people,
"damping_factor": RIPPLE_DAMPING,
"hops": hops,
}
def compute_karma_chain(self, actions: List[Dict]) -> List[Dict]:
"""
Analyze a chain of related actions and compute cascading karma effects.
Each action in the chain can amplify or attenuate subsequent scores.
Input: [{text, timestamp, parent_action_id}, ...]
Output: [{text, score, chain_modifier, effective_score}, ...]
"""
chain_results = []
chain_modifier = 1.0
for i, action in enumerate(actions):
# Score this action
result = self.score_text(action["text"])
base_score = result["aggregate_karma"]
# Apply chain modifier from previous actions
effective_score = base_score * chain_modifier
effective_score = max(0, min(100, effective_score))
# Update chain modifier based on this action
# Positive actions (>60) boost subsequent actions slightly
# Negative actions (<40) dampen subsequent actions
if base_score > 60:
chain_modifier = min(1.5, chain_modifier * 1.05)
elif base_score < 40:
chain_modifier = max(0.5, chain_modifier * 0.95)
else:
chain_modifier = chain_modifier * 0.99 # slight decay to neutral
chain_results.append({
"index": i,
"text": action["text"],
"base_score": base_score,
"axis_scores": result["axis_scores"],
"chain_modifier": round(chain_modifier, 4),
"effective_score": round(effective_score, 2),
"timestamp": action.get("timestamp", datetime.now(timezone.utc).isoformat()),
})
return chain_results
def get_karma_tier(self, score: float) -> Dict[str, str]:
"""
Map aggregate karma score to a tier with label and description.
"""
if score >= 90:
return {"tier": "S", "label": "Enlightened", "color": "#FFD700",
"description": "Exceptional ethical impact — a true force for good"}
elif score >= 75:
return {"tier": "A", "label": "Virtuous", "color": "#00E5FF",
"description": "Strong positive ethical presence"}
elif score >= 60:
return {"tier": "B", "label": "Benevolent", "color": "#69F0AE",
"description": "Consistently positive ethical behavior"}
elif score >= 45:
return {"tier": "C", "label": "Neutral", "color": "#B0BEC5",
"description": "Balanced — no strong ethical lean"}
elif score >= 30:
return {"tier": "D", "label": "Questionable", "color": "#FFB74D",
"description": "Actions raise ethical concerns"}
elif score >= 15:
return {"tier": "E", "label": "Harmful", "color": "#FF5252",
"description": "Significant negative ethical impact"}
else:
return {"tier": "F", "label": "Destructive", "color": "#D50000",
"description": "Severely harmful — urgent ethical concern"}
# Singleton instance
_scorer = None
def get_scorer() -> KarmaScorer:
"""Get or create the global KarmaScorer instance."""
global _scorer
if _scorer is None:
_scorer = KarmaScorer()
_scorer.load_models()
return _scorer