Spaces:

VOIDER
/

UGI-Leaderboard-Presets

Running

App Files Files Community

UGI-Leaderboard-Presets / scoring.py

VOIDER

🛡️ Security & Stability Update

a597782 verified 4 months ago

raw

history blame contribute delete

10.3 kB

	import pandas as pd
	import numpy as np
	from config import *

	# Mapping: Friendly Name -> (Normalized Column in DF, Original Column for reference)
	METRIC_MAP = {
	'Textbook': ('norm_Textbook', 'Textbook'),
	'Pop Culture': ('norm_PopCulture', 'Pop Culture'),
	'World Model': ('Composite_WorldModel', 'Composite_WorldModel'),
	'Instruction': ('norm_Instruction', 'W/10-Adherence'),
	'Writing Style': ('norm_Style', 'avg_writing_style_score'),
	'Originality': ('norm_Originality', 'originality_score'),
	'Dialogue': ('gauss_Dialogue', 'Dialogue_Percentage'),
	'Unbound': ('Composite_Unbound', 'Composite_Unbound'),
	'NSFW Tone': ('norm_NSFW', 'avg_nsfw_score'),
	'Dark Tone': ('norm_Dark', 'avg_dark_score'),
	'Redundancy': ('Composite_Redundancy', 'Composite_Redundancy'),
	'Hazardous': ('norm_Hazardous', 'Hazardous'),
	'Entertainment': ('norm_Entertainment', 'Entertainment'),
	'Length Acc': ('inv_LengthErr', 'avg_length_error_pct'),
	'VerbNoun': ('gauss_VerbNoun', 'Verb_to_Noun_Ratio')
	}

	PRESET_CONFIGS = {
	"🌌 Divine RP": {
	'Textbook': 0.12, 'Pop Culture': 0.08, 'World Model': 0.10,
	'Instruction': 0.10, 'Writing Style': 0.25, 'Originality': 0.10,
	'Dialogue': 0.15, 'Unbound': 0.05, 'Redundancy': 0.05
	},
	"🌶️ Erotic Storyteller": {
	'World Model': 0.10, 'Instruction': 0.05, 'Writing Style': 0.15,
	'Originality': 0.05, 'Dialogue': 0.15, 'Unbound': 0.30,
	'NSFW Tone': 0.15, 'Redundancy': 0.05
	},
	"🤖 T-800 Logic": {
	'Textbook': 0.40, 'World Model': 0.35, 'Instruction': 0.20, 'Redundancy': 0.05
	},
	"✒️ Literary Virtuoso": {
	'Writing Style': 0.35, 'Originality': 0.30, 'Redundancy': 0.15,
	'Instruction': 0.10, 'Dialogue': 0.10
	},
	"🎲 Dungeon Master": {
	'World Model': 0.30, 'Textbook': 0.15, 'Pop Culture': 0.15,
	'Instruction': 0.20, 'Originality': 0.10, 'Dialogue': 0.10
	},
	"🌑 Dark Novelist": {
	'Dark Tone': 0.25, 'Writing Style': 0.25, 'Hazardous': 0.15,
	'Originality': 0.20, 'Unbound': 0.15
	},
	"🧼 Anti-Slop": {
	'Originality': 0.45, 'Redundancy': 0.35, 'Writing Style': 0.10, 'Instruction': 0.10
	},
	"🎯 Concise Assistant": {
	'Instruction': 0.35, 'Redundancy': 0.30, 'Textbook': 0.20,
	'World Model': 0.10, 'Dialogue': 0.05
	},
	"🎪 Entertainment Savant": {
	'Pop Culture': 0.40, 'Entertainment': 0.25, 'Instruction': 0.15,
	'Writing Style': 0.10, 'Dialogue': 0.10
	},
	"🔬 Unfiltered Scholar": {
	'Textbook': 0.30, 'Hazardous': 0.25, 'Unbound': 0.20,
	'Instruction': 0.15, 'Originality': 0.05, 'Redundancy': 0.05
	},
	# === BALANCE PRESETS ===
	"💎 Perfect Balance": {
	'special_type': 'balanced',
	'metrics': BALANCE_METRICS_LIST
	},
	"⚖️ No Weak Spots": {
	'special_type': 'harmonic',
	'metrics': BALANCE_METRICS_LIST
	}
	}

	class ScoringEngine:
	def __init__(self, df):
	# We work on a copy to avoid side effects on the source dataframe
	self.df = df.copy()

	def calculate_all(self):
	if self.df.empty:
	return self.df

	print("🧮 Calculating scores...")

	for preset_name, config in PRESET_CONFIGS.items():
	col_name = f"Score_{preset_name}"

	if isinstance(config, dict) and 'special_type' in config:
	if config['special_type'] == 'balanced':
	self.df[col_name] = self._calculate_balanced_score(config['metrics'])
	elif config['special_type'] == 'harmonic':
	self.df[col_name] = self._calculate_harmonic_score(config['metrics'])
	else:
	self.df[col_name] = self.calculate_weighted_score(config)

	# Efficiency King: Score per Parameter
	# Logic: Only calculate for models with known, positive parameters.
	eff_col = "Score_⚡ Efficiency King"
	base_score_col = "Score_🌌 Divine RP"

	if base_score_col in self.df.columns and 'Total Parameters' in self.df.columns:
	params = self.df['Total Parameters']
	base_score = self.df[base_score_col]

	# Mask for valid parameters (positive and not NaN)
	valid_mask = (params > 0) & params.notna()

	self.df[eff_col] = np.nan
	if valid_mask.any():
	# vectorized calculation
	self.df.loc[valid_mask, eff_col] = (
	base_score.loc[valid_mask] / np.power(params.loc[valid_mask], 0.4)
	) * 10

	# Fill NaNs with 0 for sorting purposes if needed, or leave as NaN
	self.df[eff_col] = self.df[eff_col].fillna(0)

	self._generate_badges_vectorized()

	# Round scores
	score_cols = [c for c in self.df.columns if c.startswith("Score_")]
	self.df[score_cols] = self.df[score_cols].round(3)

	return self.df

	def calculate_weighted_score(self, weights_dict):
	"""Standard weighted average logic."""
	# Validation
	if not weights_dict:
	raise ValueError("weights_dict cannot be empty")

	unknown_keys = [k for k in weights_dict.keys() if k not in METRIC_MAP]
	if unknown_keys:
	raise ValueError(f"Unknown metrics in weights: {unknown_keys}")

	if any(w < 0 for w in weights_dict.values()):
	raise ValueError("Weights cannot be negative")

	weighted_sum = pd.Series(0.0, index=self.df.index)
	total_valid_weight = pd.Series(0.0, index=self.df.index)
	total_preset_weight = sum(weights_dict.values())

	for key, weight in weights_dict.items():
	norm_col, _ = METRIC_MAP[key]
	if norm_col not in self.df.columns:
	continue

	values = self.df[norm_col]
	# No sentinel check needed, just check for NaN
	mask = values.notna()

	weighted_sum[mask] += values[mask] * weight
	total_valid_weight[mask] += weight

	# Avoid division by zero
	final_score = weighted_sum / total_valid_weight.replace(0, np.nan)
	final_score = final_score.fillna(0.0)

	# Penalties for insufficient data coverage
	valid_weight_ratio = total_valid_weight / total_preset_weight
	insufficient_mask = valid_weight_ratio < INSUFFICIENT_DATA_THRESHOLD
	final_score[insufficient_mask] *= INSUFFICIENT_DATA_PENALTY

	self._apply_global_penalties(final_score)
	return final_score

	def _calculate_balanced_score(self, metric_keys):
	"""Hybrid: sqrt(min) * sqrt(geometric_mean)."""
	return self._calculate_special_score(metric_keys, method='hybrid')

	def _calculate_harmonic_score(self, metric_keys):
	"""Harmonic Mean."""
	return self._calculate_special_score(metric_keys, method='harmonic')

	def _calculate_special_score(self, metric_keys, method):
	cols_to_use = [col for col in metric_keys if col in self.df.columns]
	if not cols_to_use:
	return pd.Series(0.0, index=self.df.index)

	subset = self.df[cols_to_use].copy()

	# Soft NaN handling: fill missing with (0.3 + median)/2 to allow comparison
	for col in subset.columns:
	col_median = subset[col].median()
	if pd.isna(col_median) or col_median <= 0:
	fill_val = 0.3
	else:
	fill_val = (0.3 + col_median) / 2
	subset[col] = subset[col].fillna(fill_val)

	# Clip values to valid range [0.1, 1.0] for geometric/harmonic calculations
	subset = subset.clip(lower=0.1, upper=1.0)

	if method == 'hybrid':
	min_score = subset.min(axis=1)
	# Log-sum-exp trick for geometric mean stability
	log_mean = np.log(subset).mean(axis=1)
	geom_score = np.exp(log_mean)
	final_score = np.sqrt(min_score) * np.sqrt(geom_score)

	elif method == 'harmonic':
	n = len(cols_to_use)
	sum_inverse = (1.0 / subset).sum(axis=1)
	# Safe division: if sum_inverse is 0 (unlikely due to clip), return 0
	final_score = np.divide(n, sum_inverse, out=np.zeros_like(sum_inverse), where=sum_inverse != 0)

	self._apply_global_penalties(final_score)
	return final_score

	def _apply_global_penalties(self, score_series):
	if 'penalty_repetition' in self.df.columns:
	score_series *= self.df['penalty_repetition'].fillna(1.0)
	if 'penalty_thinking' in self.df.columns:
	score_series *= self.df['penalty_thinking'].fillna(1.0)

	def _generate_badges_vectorized(self):
	badges = pd.Series("", index=self.df.index)

	# Fresh
	if 'Is_New' in self.df:
	badges += np.where(self.df['Is_New'].fillna(False).astype(bool), "🆕 ", "")

	# Thinking
	if 'Is Thinking Model' in self.df:
	badges += np.where(self.df['Is Thinking Model'].fillna(False).astype(bool), "🧠 ", "")

	# NSFW
	if 'norm_NSFW' in self.df:
	# Check for NaN implicitly by comparing > threshold (NaN > x is False)
	badges += np.where(self.df['norm_NSFW'] > NSFW_BADGE_THRESHOLD, "🔞 ", "")

	# Repetitive
	if 'Repetition Interrupts' in self.df:
	badges += np.where(self.df['Repetition Interrupts'] >= 1.0, "📉 ", "")

	# Model Sizes using constants
	params = self.df.get('Total Parameters', pd.Series(np.nan, index=self.df.index))
	# Valid params mask
	has_params = params.notna() & (params > 0)

	badges += np.where(has_params & (params <= POCKET_MODEL_THRESHOLD), "🤏 ", "")
	badges += np.where(has_params & (params >= GIANT_MODEL_THRESHOLD), "🐳 ", "")

	self.df['Badges'] = badges.str.strip()