Spaces:

kgdrathan
/

explainer-env-dashboard

Sleeping

explainer-env-dashboard / src /lib /rewards.ts

Gnan Deep Rathan K

Deploy dashboard without binary lockfile

1b83e76 28 days ago

4.13 kB

	import type { ExplainerObservation, RewardEntry } from "./types";

	const NON_REWARD_KEYS = new Set([
	"step",
	"phase",
	"tool",
	"source_count",
	"error",
	"explore_steps_used",
	"repair_steps_used",
	"sandbox_message",
	"error_codes",
	]);

	const VISIBLE: Record<string, string[]> = {
	explore: ["query_quality", "evidence_quality", "information_gain", "efficiency", "explore_total"],
	generate: ["validity", "task_alignment", "structure", "research_usage", "generate_total"],
	repair: ["repair_success", "fixed_prior_errors", "changed_code", "repair_total"],
	};

	const FALLBACK_WEIGHTS: Record<string, Record<string, number>> = {
	explore: {
	query_quality: 0.2,
	evidence_quality: 0.25,
	information_gain: 0.4,
	efficiency: 0.15,
	},
	generate: {
	validity: 0.15,
	task_alignment: 0.3,
	structure: 0.3,
	research_usage: 0.25,
	},
	repair: {
	repair_success: 0.6,
	fixed_prior_errors: 0.2,
	changed_code: 0.2,
	},
	};

	export function rewardComponents(obs: ExplainerObservation): Record<string, number \| string> {
	const meta = obs.metadata \|\| {};
	const all: Record<string, number> = {};
	for (const [k, v] of Object.entries(meta)) {
	if (NON_REWARD_KEYS.has(k)) continue;
	if (typeof v === "number" && !Number.isNaN(v)) all[k] = v;
	}
	const phase = (meta.phase as string) \|\| obs.phase;
	const allowed = VISIBLE[phase];
	if (allowed) {
	const visible: Record<string, number> = {};
	for (const k of allowed) if (k in all) visible[k] = all[k];
	if (Object.keys(visible).length) return visible;
	}
	return Object.keys(all).length ? all : parseRewardComponents(obs.feedback);
	}

	function parseRewardComponents(feedback: string): Record<string, number \| string> {
	const rewardMatch = feedback.match(/Reward:\s(\{[\s\S]?\})(?:\n\|$)/);
	if (rewardMatch?.[1]) {
	const components: Record<string, number> = {};
	for (const match of rewardMatch[1].matchAll(
	/['"]?([A-Za-z0-9_]+)['"]?\s:\s(-?\d+(?:\.\d+)?)/g,
	)) {
	components[match[1]] = Number(match[2]);
	}
	if (Object.keys(components).length) return components;
	}

	const kvMatch = feedback.match(/Reward:\s*([^\n]+)/);
	if (!kvMatch?.[1]) return {};

	const components: Record<string, number \| string> = {};
	for (const part of kvMatch[1].split(",")) {
	const [rawKey, rawValue] = part.split("=");
	if (!rawKey \|\| rawValue === undefined) continue;
	const key = rawKey.trim();
	const value = rawValue.trim();
	const numeric = Number(value);
	components[key] = Number.isFinite(numeric) ? numeric : value;
	}
	return components;
	}

	export function totalForPhase(
	phase: string,
	components: Record<string, number \| string>,
	): number \| null {
	const totalKey =
	phase === "explore"
	? "explore_total"
	: phase === "generate"
	? "generate_total"
	: phase === "repair"
	? "repair_total"
	: null;
	if (totalKey && typeof components[totalKey] === "number") {
	return components[totalKey] as number;
	}
	return weightedFallbackTotal(phase, components);
	}

	function weightedFallbackTotal(
	phase: string,
	components: Record<string, number \| string>,
	): number \| null {
	const weights = FALLBACK_WEIGHTS[phase];
	if (!weights) return null;

	let total = 0;
	let usedWeight = 0;
	for (const [key, weight] of Object.entries(weights)) {
	const value = components[key];
	if (typeof value !== "number") continue;
	total += value * weight;
	usedWeight += weight;
	}

	if (usedWeight === 0) return null;
	return Math.max(0, Math.min(1, total / usedWeight));
	}

	export const SUCCESS_SCORE_THRESHOLD = 0.3;
	const MAX_EXPLORE_STEPS = 6;
	const MAX_EXPLORE_REWARD = 1.0;
	const MAX_GENERATE_REWARD = 1.0;

	export function normalizedEpisodeScore(rewards: RewardEntry[]): number {
	const totalReward = rewards
	.map((r) => r.total)
	.filter((total): total is number => typeof total === "number")
	.reduce((acc, total) => acc + total, 0);
	const maxPossible = MAX_EXPLORE_STEPS * MAX_EXPLORE_REWARD + MAX_GENERATE_REWARD;
	return Math.max(0, Math.min(1, totalReward / maxPossible));
	}