Labexperiment / server /rubric.py
Sbhimraj's picture
Add application file
aab0192
Raw
History Blame Contribute Delete
10.4 kB
"""
server/rubric.py -- Multi-component reward rubric for the Hypothesis Lab.
Components:
1. accuracy_score (0.0-1.0) -- how close is the hypothesis to ground truth
2. precision_bonus (+0.10) -- hypothesis contains quantitative claims
3. calibration_score (0.0-0.20) -- expressed confidence matches accuracy
4. efficiency_bonus (+0.15) -- submitted early with high accuracy
5. contradiction_penalty (-0.50) -- hypothesis contradicts hard constraints
Per-step info-gain scoring is handled by InfoGainTracker.
"""
from __future__ import annotations
import re
import math
from dataclasses import dataclass, field
from typing import Any, Optional
import numpy as np
from .causal_world import CausalWorld
@dataclass
class RubricResult:
"""Full rubric breakdown returned when a hypothesis is scored."""
accuracy_score: float = 0.0
precision_bonus: float = 0.0
calibration_score: float = 0.0
efficiency_bonus: float = 0.0
contradiction_penalty: float = 0.0
feedback: str = ""
ground_truth: str = ""
@property
def total(self) -> float:
return (
self.accuracy_score
+ self.precision_bonus
+ self.calibration_score
+ self.efficiency_bonus
+ self.contradiction_penalty
)
def to_dict(self) -> dict[str, float]:
return {
"accuracy_score": round(self.accuracy_score, 4),
"precision_bonus": round(self.precision_bonus, 4),
"calibration_score": round(self.calibration_score, 4),
"efficiency_bonus": round(self.efficiency_bonus, 4),
"contradiction_penalty": round(self.contradiction_penalty, 4),
"total": round(self.total, 4),
}
class InfoGainTracker:
"""
Tracks experiment history and computes per-step information gain rewards.
Also detects redundant experiments.
"""
def __init__(self) -> None:
self._edge_counts: dict[tuple[str, str], int] = {}
self._edge_types: dict[tuple[str, str], set[str]] = {}
self.cumulative_gain: float = 0.0
self.redundant_count: int = 0
def record_and_score(
self,
cause: str,
effect: str,
exp_type: str,
result_value: Any,
) -> tuple[float, bool]:
"""
Record an experiment and return (reward, is_redundant).
Reward schedule:
- First observation of an edge: +0.20
- Second (different exp type = triangulation bonus): +0.25
- Second (same type): +0.12
- Third+: -0.10 (redundant penalty)
"""
key = (cause, effect)
prior = self._edge_counts.get(key, 0)
prior_types = set(self._edge_types.get(key, set()))
self._edge_counts[key] = prior + 1
if key not in self._edge_types:
self._edge_types[key] = set()
self._edge_types[key].add(exp_type)
if prior == 0:
reward = 0.20
elif prior == 1:
triangulation = exp_type not in prior_types
reward = 0.25 if triangulation else 0.12
elif prior == 2:
reward = 0.05
else:
reward = -0.10
self.redundant_count += 1
is_redundant = prior >= 3
if is_redundant:
reward = -0.10
self.redundant_count += 1
self.cumulative_gain += max(reward, 0.0)
return round(reward, 4), is_redundant
HARD_CONSTRAINTS = [
(r"all variables.*independent", "Claiming all variables are independent contradicts the experimental setup"),
(r"no.*relationship|no.*causal", "Claiming no relationships exist contradicts the experimental setup"),
]
_RULE_KEYWORDS: dict[str, list[str]] = {
"linear": [
"linear", "proportional", "slope", "times", "multiply",
"increases", "decreases",
],
"threshold": [
"threshold", "above", "below", "greater", "less",
"if", "when", "switch", "cutoff",
],
"inverse": ["inverse", "inversely", "reciprocal", "divided", "1/"],
"quadratic": [
"quadratic", "squared", "parabol", "x^2", "x²", "nonlinear",
"curve", "polynomial",
],
"exponential": [
"exponential", "exp(", "growth", "decay", "e^", "geometric",
],
"logarithmic": [
"logarithm", "log(", "ln(", "log ", "diminishing returns",
],
"saturating": [
"saturating", "saturat", "michaelis", "plateau", "asymptote",
"levels off", "diminishing", "vmax",
],
"piecewise_linear": [
"piecewise", "breakpoint", "knot", "changes slope",
"two-segment", "regime change", "kink",
],
"additive": [
"additive", "sum", "combines", "both contribute", "joint",
],
"multiplicative": [
"multiplicative", "product", "multiply", "synerg", "interaction",
],
"min": ["minimum", "bottleneck", "limiting factor", "min("],
"max": ["maximum", "dominant", "max("],
}
def _accuracy_score(hypothesis: str, world: CausalWorld) -> float:
"""Score how well the hypothesis captures the ground truth rules."""
if not hypothesis.strip():
return 0.0
text = hypothesis.lower()
all_scorable = list(world.rules)
total_items = len(all_scorable) + len(world.interactions)
if total_items == 0:
return 0.5
hits = 0.0
for rule in all_scorable:
cause_l = rule.cause.lower()
effect_l = rule.effect.lower()
has_cause = cause_l in text or cause_l[:4] in text
has_effect = effect_l in text or effect_l[:4] in text
if not (has_cause and has_effect):
continue
hits += 0.4
keywords = _RULE_KEYWORDS.get(rule.rule_type, [])
if any(w in text for w in keywords):
hits += 0.3
key_param = _key_param_for_rule(rule)
if key_param is not None and str(round(abs(key_param), 1)) in hypothesis:
hits += 0.3
for inter in world.interactions:
c1_l = inter.cause1.lower()
c2_l = inter.cause2.lower()
eff_l = inter.effect.lower()
found_c1 = c1_l in text or c1_l[:4] in text
found_c2 = c2_l in text or c2_l[:4] in text
found_eff = eff_l in text or eff_l[:4] in text
if found_eff and (found_c1 or found_c2):
hits += 0.3
if found_eff and found_c1 and found_c2:
hits += 0.2
keywords = _RULE_KEYWORDS.get(inter.interaction_type, [])
if any(w in text for w in keywords):
hits += 0.5
max_possible = total_items * 1.0
return min(hits / max_possible, 1.0) if max_possible > 0 else 0.0
def _key_param_for_rule(rule) -> Optional[float]:
"""Return the most important parameter for a rule type, for matching."""
rt = rule.rule_type
p = rule.params
if rt == "linear":
return p.get("a")
elif rt == "threshold":
return p.get("threshold")
elif rt == "inverse":
return p.get("a")
elif rt == "quadratic":
return p.get("a")
elif rt == "exponential":
return p.get("k")
elif rt == "logarithmic":
return p.get("a")
elif rt == "saturating":
return p.get("v_max")
elif rt == "piecewise_linear":
return p.get("knot")
return None
def _precision_bonus(text: str) -> float:
"""Does the hypothesis contain numerical values?"""
numbers = re.findall(r"-?\d+\.?\d*", text)
meaningful = [n for n in numbers if n not in ("0", "1")]
return 0.10 if len(meaningful) >= 2 else 0.0
def _calibration_score(expressed: Optional[float], actual: float) -> float:
"""Score based on |expressed_confidence - actual_accuracy|."""
if expressed is None:
return 0.0
error = abs(expressed - actual)
return max(0.0, 0.20 * (1.0 - error / 0.5))
def _constraint_penalty(text: str) -> float:
text_l = text.lower()
for pattern, _ in HARD_CONSTRAINTS:
if re.search(pattern, text_l):
return -0.50
return 0.0
def _build_feedback(result: RubricResult) -> str:
lines = []
if result.accuracy_score >= 0.75:
lines.append("Strong accuracy -- you identified most causal relationships.")
elif result.accuracy_score >= 0.40:
lines.append("Partial accuracy -- some relationships identified correctly.")
else:
lines.append("Low accuracy -- try running more diverse experiments.")
if result.precision_bonus > 0:
lines.append("Good precision -- quantitative claims detected.")
else:
lines.append("Tip: include numerical values (slopes, thresholds) for precision bonus.")
if result.efficiency_bonus > 0:
lines.append("Efficient submission -- well-timed.")
else:
lines.append("Tip: submit earlier when confident to earn efficiency bonus.")
if result.calibration_score >= 0.15:
lines.append("Well-calibrated confidence.")
elif result.calibration_score > 0:
lines.append("Confidence calibration could improve.")
if result.contradiction_penalty < 0:
lines.append("WARNING: hypothesis contradicts known physical constraints.")
return " ".join(lines)
def score_hypothesis(
hypothesis_text: str,
hypothesis_equations: Optional[list[str]],
confidence: Optional[float],
world: CausalWorld,
budget_remaining: int,
budget_total: int,
) -> RubricResult:
"""
Score a submitted hypothesis against the ground truth world.
Returns a RubricResult with all component scores, feedback text,
and the revealed ground truth.
"""
full_text = hypothesis_text or ""
if hypothesis_equations:
full_text += " " + " ".join(hypothesis_equations)
result = RubricResult()
result.accuracy_score = _accuracy_score(full_text, world)
result.precision_bonus = _precision_bonus(full_text)
result.calibration_score = _calibration_score(confidence, result.accuracy_score)
result.contradiction_penalty = _constraint_penalty(full_text)
ratio = budget_remaining / max(budget_total, 1)
if ratio >= 0.30 and result.accuracy_score >= 0.60:
result.efficiency_bonus = 0.15
elif ratio >= 0.15 and result.accuracy_score >= 0.40:
result.efficiency_bonus = 0.07
result.ground_truth = world.ground_truth_summary()
result.feedback = _build_feedback(result)
return result