Spaces:

onenoly11
/

llm-coherence-auditor

Sleeping

App Files Files Community

llm-coherence-auditor / app.py

onenoly11

Upload folder using huggingface_hub

81c7838 verified 22 days ago

raw

history blame contribute delete

17 kB

	#!/usr/bin/env python3
	"""
	╔═══════════════════════════════════════════════════════════════════════════════╗
	║ LLM COHERENCE AUDITING FRAMEWORK - GRADIO SPACE ║
	║ The Flatline Truth Dashboard ║
	║ Quantum Pi Forge ║
	╠═══════════════════════════════════════════════════════════════════════════════╣
	║ Interactive Gradio app for auditing LLM preference stability under noise. ║
	║ Authors: Olofson & Grok (2025). Bradley-Terry + Plackett-Luce models. ║
	║ License: CC-BY-4.0 ║
	╚═══════════════════════════════════════════════════════════════════════════════╝
	"""

	import gradio as gr
	import numpy as np
	import matplotlib.pyplot as plt
	from typing import Tuple
	import warnings
	warnings.filterwarnings('ignore')

	# ============================================================================
	# CORE STATISTICAL MODELS
	# ============================================================================

	def bradley_terry_probability(lambda_param: float) -> float:
	"""
	Bradley-Terry model: P(Y > X) = exp(λ) / (1 + exp(λ))

	Args:
	lambda_param: Log-odds parameter (R - βN in the full model)

	Returns:
	Probability that Y is preferred over X
	"""
	return np.exp(lambda_param) / (1 + np.exp(lambda_param))


	def estimate_exchange_rate(
	n_trials: int,
	base_rate: float,
	noise_std: float,
	seed: int = 42
	) -> Tuple[float, float]:
	"""
	Estimate Exchange Rate (R) via Maximum Likelihood Estimation
	with injected Gaussian noise.

	Args:
	n_trials: Number of comparison trials
	base_rate: True underlying exchange rate
	noise_std: Standard deviation of Gaussian noise (temperature T)
	seed: Random seed for reproducibility

	Returns:
	Tuple of (estimated_rate, standard_error)
	"""
	np.random.seed(seed)

	# Inject noise into each trial
	noisy_rates = base_rate + np.random.normal(0, noise_std, n_trials)

	# MLE estimate is the mean of observed rates
	estimated_rate = np.mean(noisy_rates)
	standard_error = np.std(noisy_rates) / np.sqrt(n_trials)

	return estimated_rate, standard_error


	def calculate_coherence(
	n_trials: int,
	base_rate: float,
	noise_std: float,
	seed: int = 42
	) -> float:
	"""
	Calculate Coherence (κ) - the consistency of predictions under noise.

	Coherence measures how often the same preference ranking is maintained
	across repeated noisy trials.

	Args:
	n_trials: Number of comparison trials
	base_rate: True underlying exchange rate
	noise_std: Standard deviation of Gaussian noise
	seed: Random seed

	Returns:
	Coherence score between 0 and 1
	"""
	np.random.seed(seed)

	# Generate noisy probabilities
	noisy_rates = base_rate + np.random.normal(0, noise_std, n_trials)
	probs = [bradley_terry_probability(r) for r in noisy_rates]

	# Coherence: fraction of trials where preference > 0.5 matches base preference
	base_preference = bradley_terry_probability(base_rate) > 0.5
	coherent_trials = sum((p > 0.5) == base_preference for p in probs)

	return coherent_trials / n_trials


	def plackett_luce_utilities(
	v_a: float,
	v_b: float,
	v_c: float = 0.0
	) -> dict:
	"""
	Plackett-Luce model for triad comparisons.

	Calculates probability of each ranking permutation given utilities.
	Identifiability constraint: V_C = 0 (reference point).

	Args:
	v_a: Utility of option A
	v_b: Utility of option B
	v_c: Utility of option C (default 0 for identifiability)

	Returns:
	Dictionary of ranking probabilities
	"""
	exp_a, exp_b, exp_c = np.exp(v_a), np.exp(v_b), np.exp(v_c)
	total = exp_a + exp_b + exp_c

	# P(A > B > C) = (exp_a/total) * (exp_b/(exp_b + exp_c))
	rankings = {
	"A > B > C": (exp_a / total) * (exp_b / (exp_b + exp_c)),
	"A > C > B": (exp_a / total) * (exp_c / (exp_b + exp_c)),
	"B > A > C": (exp_b / total) * (exp_a / (exp_a + exp_c)),
	"B > C > A": (exp_b / total) * (exp_c / (exp_a + exp_c)),
	"C > A > B": (exp_c / total) * (exp_a / (exp_a + exp_b)),
	"C > B > A": (exp_c / total) * (exp_b / (exp_a + exp_b)),
	}

	return rankings


	# ============================================================================
	# ROBUSTNESS GAUNTLET - CORE VISUALIZATION
	# ============================================================================

	def run_robustness_gauntlet(
	base_rate: float,
	n_trials: int,
	n_runs: int,
	max_noise: float
	) -> Tuple[plt.Figure, str]:
	"""
	Run the full Robustness Gauntlet: sweep noise levels and measure divergence.

	THE KEY FINDING: Exchange Rate stays stable (FLATLINE) while
	Coherence decays (FRAGILE). Stable parameters ≠ reliable predictions.
	"""
	noise_levels = np.linspace(0.01, max_noise, 20)

	# Storage for results
	rate_means = []
	rate_stds = []
	coherence_means = []
	coherence_stds = []

	for noise in noise_levels:
	run_rates = []
	run_coherences = []

	for run in range(n_runs):
	rate, _ = estimate_exchange_rate(n_trials, base_rate, noise, seed=run)
	coherence = calculate_coherence(n_trials, base_rate, noise, seed=run + 1000)
	run_rates.append(rate)
	run_coherences.append(coherence)

	rate_means.append(np.mean(run_rates))
	rate_stds.append(np.std(run_rates))
	coherence_means.append(np.mean(run_coherences))
	coherence_stds.append(np.std(run_coherences))

	# Convert to numpy
	rate_means = np.array(rate_means)
	rate_stds = np.array(rate_stds)
	coherence_means = np.array(coherence_means)
	coherence_stds = np.array(coherence_stds)

	# Create figure
	fig, ax1 = plt.subplots(figsize=(10, 6))

	# Primary axis: Exchange Rate
	color1 = '#2563eb' # Blue
	ax1.set_xlabel('Noise Level (Temperature T)', fontsize=12)
	ax1.set_ylabel('Exchange Rate (R)', color=color1, fontsize=12)
	ax1.plot(noise_levels, rate_means, color=color1, linewidth=2, label='Exchange Rate')
	ax1.fill_between(noise_levels,
	rate_means - rate_stds,
	rate_means + rate_stds,
	color=color1, alpha=0.2)
	ax1.tick_params(axis='y', labelcolor=color1)
	ax1.axhline(y=base_rate, color=color1, linestyle='--', alpha=0.5, label=f'True Rate ({base_rate})')

	# Secondary axis: Coherence
	ax2 = ax1.twinx()
	color2 = '#dc2626' # Red
	ax2.set_ylabel('Coherence (κ)', color=color2, fontsize=12)
	ax2.plot(noise_levels, coherence_means, color=color2, linewidth=2, label='Coherence')
	ax2.fill_between(noise_levels,
	coherence_means - coherence_stds,
	coherence_means + coherence_stds,
	color=color2, alpha=0.2)
	ax2.tick_params(axis='y', labelcolor=color2)
	ax2.set_ylim(0, 1.1)

	# Title and legend
	fig.suptitle('The Robustness Gauntlet: Rate Stability vs Coherence Decay', fontsize=14, fontweight='bold')

	# Combined legend
	lines1, labels1 = ax1.get_legend_handles_labels()
	lines2, labels2 = ax2.get_legend_handles_labels()
	ax1.legend(lines1 + lines2, labels1 + labels2, loc='lower left')

	plt.tight_layout()

	# Summary statistics
	final_rate = rate_means[-1]
	final_coherence = coherence_means[-1]
	rate_drift = abs(final_rate - base_rate) / base_rate * 100
	coherence_drop = (1 - final_coherence) * 100

	summary = f"""
	## 📊 Gauntlet Results

	### Exchange Rate (Blue Line)
	- Final Rate: {final_rate:.4f} (drift: {rate_drift:.1f}% from true)
	- Status: {'✅ STABLE (Flatline)' if rate_drift < 10 else '⚠️ Drifting'}

	### Coherence (Red Line)
	- Final Coherence: {final_coherence:.2%}
	- Decay: {coherence_drop:.1f}% from perfect
	- Status: {'✅ Stable' if final_coherence > 0.8 else '⚠️ FRAGILE' if final_coherence > 0.6 else '🔴 CRITICAL'}

	### 🎯 The Divergence Finding
	"Stable parameters ≠ reliable predictions"

	The Exchange Rate remains nearly constant (FLATLINE) while Coherence
	decays significantly. This demonstrates that parameter-level stability
	metrics can mask predictive fragility.
	"""

	return fig, summary


	def run_triad_analysis(v_a: float, v_b: float) -> Tuple[plt.Figure, str]:
	"""
	Run Plackett-Luce triad analysis with given utilities.
	"""
	v_c = 0.0 # Identifiability constraint

	rankings = plackett_luce_utilities(v_a, v_b, v_c)

	# Create bar chart
	fig, ax = plt.subplots(figsize=(10, 5))

	names = list(rankings.keys())
	probs = list(rankings.values())
	colors = ['#2563eb', '#3b82f6', '#60a5fa', '#93c5fd', '#bfdbfe', '#dbeafe']

	bars = ax.barh(names, probs, color=colors)
	ax.set_xlabel('Probability', fontsize=12)
	ax.set_title(f'Plackett-Luce Ranking Probabilities\n(V_A={v_a:.2f}, V_B={v_b:.2f}, V_C=0.00)',
	fontsize=14, fontweight='bold')
	ax.set_xlim(0, 1)

	# Add value labels
	for bar, prob in zip(bars, probs):
	ax.text(prob + 0.02, bar.get_y() + bar.get_height()/2,
	f'{prob:.3f}', va='center', fontsize=10)

	plt.tight_layout()

	# Find most likely ranking
	most_likely = max(rankings, key=rankings.get)

	summary = f"""
	## 📐 Triad Mode Analysis (Plackett-Luce)

	### Utility Parameters
	- V_A: {v_a:.2f}
	- V_B: {v_b:.2f}
	- V_C: 0.00 (reference)

	### Most Likely Ranking
	{most_likely} with probability {rankings[most_likely]:.1%}

	### Interpretation
	- Higher utility → more likely to be ranked first
	- V_C = 0 is the identifiability constraint (anchor point)
	- Probabilities sum to 1.0 across all permutations
	"""

	return fig, summary


	# ============================================================================
	# GRADIO INTERFACE
	# ============================================================================

	with gr.Blocks(
	title="LLM Coherence Auditing Framework",
	theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate")
	) as demo:

	gr.Markdown("""
	# ⚖️ The Flatline Truth: Quantifying Coherence in LLM Preferences

	A robust auditing framework bridging viral claims to verifiable science.

	By Olofson & Grok (2025) • CC-BY-4.0

	---

	## The Crisis of Preference Stability

	RLHF builds the moral compass of modern AI. But conventional metrics overlook systemic robustness.
	This tool stress-tests emergent preferences using math, not ideology.

	Key Finding: Exchange Rate stays FLAT while Coherence DECAYS.
	Stable parameters ≠ reliable predictions.
	""")

	with gr.Tabs():
	# Tab 1: Robustness Gauntlet
	with gr.TabItem("🎯 Robustness Gauntlet"):
	gr.Markdown("""
	### The Gauntlet: Temperature Sweep with Noise Injection

	Inject Gaussian noise across multiple runs to measure the divergence between:
	- Exchange Rate (R): Parameter stability
	- Coherence (κ): Prediction consistency
	""")

	with gr.Row():
	with gr.Column(scale=1):
	base_rate_slider = gr.Slider(
	minimum=0.5, maximum=2.0, value=1.0, step=0.1,
	label="Base Exchange Rate (True R)"
	)
	n_trials_slider = gr.Slider(
	minimum=50, maximum=500, value=100, step=50,
	label="Trials per Run"
	)
	n_runs_slider = gr.Slider(
	minimum=5, maximum=50, value=10, step=5,
	label="Independent Runs"
	)
	max_noise_slider = gr.Slider(
	minimum=0.5, maximum=3.0, value=1.5, step=0.1,
	label="Maximum Noise Level (T)"
	)
	run_gauntlet_btn = gr.Button("🚀 Run Gauntlet", variant="primary")

	with gr.Column(scale=2):
	gauntlet_plot = gr.Plot(label="Robustness Gauntlet Results")
	gauntlet_summary = gr.Markdown()

	run_gauntlet_btn.click(
	fn=run_robustness_gauntlet,
	inputs=[base_rate_slider, n_trials_slider, n_runs_slider, max_noise_slider],
	outputs=[gauntlet_plot, gauntlet_summary]
	)

	# Tab 2: Triad Mode (Plackett-Luce)
	with gr.TabItem("📐 Triad Mode (Plackett-Luce)"):
	gr.Markdown("""
	### Plackett-Luce Model for Triad Comparisons

	Extend beyond binary to rank three options. The model calculates
	probability of each ranking permutation given utility values.

	Identifiability Constraint: V_C = 0 (reference anchor)
	""")

	with gr.Row():
	with gr.Column(scale=1):
	v_a_slider = gr.Slider(
	minimum=-2.0, maximum=2.0, value=1.0, step=0.1,
	label="Utility V_A"
	)
	v_b_slider = gr.Slider(
	minimum=-2.0, maximum=2.0, value=0.5, step=0.1,
	label="Utility V_B"
	)
	gr.Markdown("V_C = 0 (fixed reference)")
	run_triad_btn = gr.Button("📊 Analyze Triad", variant="primary")

	with gr.Column(scale=2):
	triad_plot = gr.Plot(label="Plackett-Luce Rankings")
	triad_summary = gr.Markdown()

	run_triad_btn.click(
	fn=run_triad_analysis,
	inputs=[v_a_slider, v_b_slider],
	outputs=[triad_plot, triad_summary]
	)

	# Tab 3: Methodology
	with gr.TabItem("📚 Methodology"):
	gr.Markdown("""
	## Statistical Framework

	### Binary Mode: Bradley-Terry Model

	Estimates the Exchange Rate (R) between two competing outcomes:

	```
	P(Y > X) = exp(λ) / (1 + exp(λ))
	where λ = R - βN
	```

	- R: Exchange rate parameter
	- β: Noise sensitivity coefficient
	- N: Noise level (temperature T)

	Maximum Likelihood Estimation recovers R from observed preferences.

	---

	### Triad Mode: Plackett-Luce Model

	Generalizes to multi-set ranking:

	```
	P(A > B > C) = (exp(V_A) / Σexp(V)) × (exp(V_B) / (exp(V_B) + exp(V_C)))
	```

	- V_i: Utility of option i
	- Identifiability: V_C = 0 (anchor)

	---

	### The Robustness Gauntlet

	1. Temperature Sweep: Inject Gaussian noise N(0, T²)
	2. Multiple Runs: 10+ independent trials per noise level
	3. Dual Metrics:
	- Rate Stability: μ ± 1σ of estimated R
	- Coherence Decay: κ = fraction maintaining correct preference

	The Divergence: When R flatlines but κ decays, the model has
	stable parameters but fragile predictions.

	---

	### References

	- Bradley, R.A. & Terry, M.E. (1952). Rank Analysis of Incomplete Block Designs
	- Plackett, R.L. (1975). The Analysis of Permutations
	- Arctotherium42 phenomenon - Community documentation of LLM preference volatility
	""")

	gr.Markdown("""
	---

	Built with 🔮 by Quantum Pi Forge • T=∞ = T=0
	""")


	if __name__ == "__main__":
	demo.launch()