Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| ╔═══════════════════════════════════════════════════════════════════════════════╗ | |
| ║ LLM COHERENCE AUDITING FRAMEWORK - GRADIO SPACE ║ | |
| ║ The Flatline Truth Dashboard ║ | |
| ║ Quantum Pi Forge ║ | |
| ╠═══════════════════════════════════════════════════════════════════════════════╣ | |
| ║ Interactive Gradio app for auditing LLM preference stability under noise. ║ | |
| ║ Authors: Olofson & Grok (2025). Bradley-Terry + Plackett-Luce models. ║ | |
| ║ License: CC-BY-4.0 ║ | |
| ╚═══════════════════════════════════════════════════════════════════════════════╝ | |
| """ | |
| import gradio as gr | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from typing import Tuple | |
| import warnings | |
| warnings.filterwarnings('ignore') | |
| # ============================================================================ | |
| # CORE STATISTICAL MODELS | |
| # ============================================================================ | |
| def bradley_terry_probability(lambda_param: float) -> float: | |
| """ | |
| Bradley-Terry model: P(Y > X) = exp(λ) / (1 + exp(λ)) | |
| Args: | |
| lambda_param: Log-odds parameter (R - βN in the full model) | |
| Returns: | |
| Probability that Y is preferred over X | |
| """ | |
| return np.exp(lambda_param) / (1 + np.exp(lambda_param)) | |
| def estimate_exchange_rate( | |
| n_trials: int, | |
| base_rate: float, | |
| noise_std: float, | |
| seed: int = 42 | |
| ) -> Tuple[float, float]: | |
| """ | |
| Estimate Exchange Rate (R) via Maximum Likelihood Estimation | |
| with injected Gaussian noise. | |
| Args: | |
| n_trials: Number of comparison trials | |
| base_rate: True underlying exchange rate | |
| noise_std: Standard deviation of Gaussian noise (temperature T) | |
| seed: Random seed for reproducibility | |
| Returns: | |
| Tuple of (estimated_rate, standard_error) | |
| """ | |
| np.random.seed(seed) | |
| # Inject noise into each trial | |
| noisy_rates = base_rate + np.random.normal(0, noise_std, n_trials) | |
| # MLE estimate is the mean of observed rates | |
| estimated_rate = np.mean(noisy_rates) | |
| standard_error = np.std(noisy_rates) / np.sqrt(n_trials) | |
| return estimated_rate, standard_error | |
| def calculate_coherence( | |
| n_trials: int, | |
| base_rate: float, | |
| noise_std: float, | |
| seed: int = 42 | |
| ) -> float: | |
| """ | |
| Calculate Coherence (κ) - the consistency of predictions under noise. | |
| Coherence measures how often the same preference ranking is maintained | |
| across repeated noisy trials. | |
| Args: | |
| n_trials: Number of comparison trials | |
| base_rate: True underlying exchange rate | |
| noise_std: Standard deviation of Gaussian noise | |
| seed: Random seed | |
| Returns: | |
| Coherence score between 0 and 1 | |
| """ | |
| np.random.seed(seed) | |
| # Generate noisy probabilities | |
| noisy_rates = base_rate + np.random.normal(0, noise_std, n_trials) | |
| probs = [bradley_terry_probability(r) for r in noisy_rates] | |
| # Coherence: fraction of trials where preference > 0.5 matches base preference | |
| base_preference = bradley_terry_probability(base_rate) > 0.5 | |
| coherent_trials = sum((p > 0.5) == base_preference for p in probs) | |
| return coherent_trials / n_trials | |
| def plackett_luce_utilities( | |
| v_a: float, | |
| v_b: float, | |
| v_c: float = 0.0 | |
| ) -> dict: | |
| """ | |
| Plackett-Luce model for triad comparisons. | |
| Calculates probability of each ranking permutation given utilities. | |
| Identifiability constraint: V_C = 0 (reference point). | |
| Args: | |
| v_a: Utility of option A | |
| v_b: Utility of option B | |
| v_c: Utility of option C (default 0 for identifiability) | |
| Returns: | |
| Dictionary of ranking probabilities | |
| """ | |
| exp_a, exp_b, exp_c = np.exp(v_a), np.exp(v_b), np.exp(v_c) | |
| total = exp_a + exp_b + exp_c | |
| # P(A > B > C) = (exp_a/total) * (exp_b/(exp_b + exp_c)) | |
| rankings = { | |
| "A > B > C": (exp_a / total) * (exp_b / (exp_b + exp_c)), | |
| "A > C > B": (exp_a / total) * (exp_c / (exp_b + exp_c)), | |
| "B > A > C": (exp_b / total) * (exp_a / (exp_a + exp_c)), | |
| "B > C > A": (exp_b / total) * (exp_c / (exp_a + exp_c)), | |
| "C > A > B": (exp_c / total) * (exp_a / (exp_a + exp_b)), | |
| "C > B > A": (exp_c / total) * (exp_b / (exp_a + exp_b)), | |
| } | |
| return rankings | |
| # ============================================================================ | |
| # ROBUSTNESS GAUNTLET - CORE VISUALIZATION | |
| # ============================================================================ | |
| def run_robustness_gauntlet( | |
| base_rate: float, | |
| n_trials: int, | |
| n_runs: int, | |
| max_noise: float | |
| ) -> Tuple[plt.Figure, str]: | |
| """ | |
| Run the full Robustness Gauntlet: sweep noise levels and measure divergence. | |
| THE KEY FINDING: Exchange Rate stays stable (FLATLINE) while | |
| Coherence decays (FRAGILE). Stable parameters ≠ reliable predictions. | |
| """ | |
| noise_levels = np.linspace(0.01, max_noise, 20) | |
| # Storage for results | |
| rate_means = [] | |
| rate_stds = [] | |
| coherence_means = [] | |
| coherence_stds = [] | |
| for noise in noise_levels: | |
| run_rates = [] | |
| run_coherences = [] | |
| for run in range(n_runs): | |
| rate, _ = estimate_exchange_rate(n_trials, base_rate, noise, seed=run) | |
| coherence = calculate_coherence(n_trials, base_rate, noise, seed=run + 1000) | |
| run_rates.append(rate) | |
| run_coherences.append(coherence) | |
| rate_means.append(np.mean(run_rates)) | |
| rate_stds.append(np.std(run_rates)) | |
| coherence_means.append(np.mean(run_coherences)) | |
| coherence_stds.append(np.std(run_coherences)) | |
| # Convert to numpy | |
| rate_means = np.array(rate_means) | |
| rate_stds = np.array(rate_stds) | |
| coherence_means = np.array(coherence_means) | |
| coherence_stds = np.array(coherence_stds) | |
| # Create figure | |
| fig, ax1 = plt.subplots(figsize=(10, 6)) | |
| # Primary axis: Exchange Rate | |
| color1 = '#2563eb' # Blue | |
| ax1.set_xlabel('Noise Level (Temperature T)', fontsize=12) | |
| ax1.set_ylabel('Exchange Rate (R)', color=color1, fontsize=12) | |
| ax1.plot(noise_levels, rate_means, color=color1, linewidth=2, label='Exchange Rate') | |
| ax1.fill_between(noise_levels, | |
| rate_means - rate_stds, | |
| rate_means + rate_stds, | |
| color=color1, alpha=0.2) | |
| ax1.tick_params(axis='y', labelcolor=color1) | |
| ax1.axhline(y=base_rate, color=color1, linestyle='--', alpha=0.5, label=f'True Rate ({base_rate})') | |
| # Secondary axis: Coherence | |
| ax2 = ax1.twinx() | |
| color2 = '#dc2626' # Red | |
| ax2.set_ylabel('Coherence (κ)', color=color2, fontsize=12) | |
| ax2.plot(noise_levels, coherence_means, color=color2, linewidth=2, label='Coherence') | |
| ax2.fill_between(noise_levels, | |
| coherence_means - coherence_stds, | |
| coherence_means + coherence_stds, | |
| color=color2, alpha=0.2) | |
| ax2.tick_params(axis='y', labelcolor=color2) | |
| ax2.set_ylim(0, 1.1) | |
| # Title and legend | |
| fig.suptitle('The Robustness Gauntlet: Rate Stability vs Coherence Decay', fontsize=14, fontweight='bold') | |
| # Combined legend | |
| lines1, labels1 = ax1.get_legend_handles_labels() | |
| lines2, labels2 = ax2.get_legend_handles_labels() | |
| ax1.legend(lines1 + lines2, labels1 + labels2, loc='lower left') | |
| plt.tight_layout() | |
| # Summary statistics | |
| final_rate = rate_means[-1] | |
| final_coherence = coherence_means[-1] | |
| rate_drift = abs(final_rate - base_rate) / base_rate * 100 | |
| coherence_drop = (1 - final_coherence) * 100 | |
| summary = f""" | |
| ## 📊 Gauntlet Results | |
| ### Exchange Rate (Blue Line) | |
| - **Final Rate**: {final_rate:.4f} (drift: {rate_drift:.1f}% from true) | |
| - **Status**: {'✅ STABLE (Flatline)' if rate_drift < 10 else '⚠️ Drifting'} | |
| ### Coherence (Red Line) | |
| - **Final Coherence**: {final_coherence:.2%} | |
| - **Decay**: {coherence_drop:.1f}% from perfect | |
| - **Status**: {'✅ Stable' if final_coherence > 0.8 else '⚠️ FRAGILE' if final_coherence > 0.6 else '🔴 CRITICAL'} | |
| ### 🎯 The Divergence Finding | |
| **"Stable parameters ≠ reliable predictions"** | |
| The Exchange Rate remains nearly constant (FLATLINE) while Coherence | |
| decays significantly. This demonstrates that parameter-level stability | |
| metrics can mask predictive fragility. | |
| """ | |
| return fig, summary | |
| def run_triad_analysis(v_a: float, v_b: float) -> Tuple[plt.Figure, str]: | |
| """ | |
| Run Plackett-Luce triad analysis with given utilities. | |
| """ | |
| v_c = 0.0 # Identifiability constraint | |
| rankings = plackett_luce_utilities(v_a, v_b, v_c) | |
| # Create bar chart | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| names = list(rankings.keys()) | |
| probs = list(rankings.values()) | |
| colors = ['#2563eb', '#3b82f6', '#60a5fa', '#93c5fd', '#bfdbfe', '#dbeafe'] | |
| bars = ax.barh(names, probs, color=colors) | |
| ax.set_xlabel('Probability', fontsize=12) | |
| ax.set_title(f'Plackett-Luce Ranking Probabilities\n(V_A={v_a:.2f}, V_B={v_b:.2f}, V_C=0.00)', | |
| fontsize=14, fontweight='bold') | |
| ax.set_xlim(0, 1) | |
| # Add value labels | |
| for bar, prob in zip(bars, probs): | |
| ax.text(prob + 0.02, bar.get_y() + bar.get_height()/2, | |
| f'{prob:.3f}', va='center', fontsize=10) | |
| plt.tight_layout() | |
| # Find most likely ranking | |
| most_likely = max(rankings, key=rankings.get) | |
| summary = f""" | |
| ## 📐 Triad Mode Analysis (Plackett-Luce) | |
| ### Utility Parameters | |
| - **V_A**: {v_a:.2f} | |
| - **V_B**: {v_b:.2f} | |
| - **V_C**: 0.00 (reference) | |
| ### Most Likely Ranking | |
| **{most_likely}** with probability **{rankings[most_likely]:.1%}** | |
| ### Interpretation | |
| - Higher utility → more likely to be ranked first | |
| - V_C = 0 is the identifiability constraint (anchor point) | |
| - Probabilities sum to 1.0 across all permutations | |
| """ | |
| return fig, summary | |
| # ============================================================================ | |
| # GRADIO INTERFACE | |
| # ============================================================================ | |
| with gr.Blocks( | |
| title="LLM Coherence Auditing Framework", | |
| theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate") | |
| ) as demo: | |
| gr.Markdown(""" | |
| # ⚖️ The Flatline Truth: Quantifying Coherence in LLM Preferences | |
| **A robust auditing framework bridging viral claims to verifiable science.** | |
| *By Olofson & Grok (2025) • CC-BY-4.0* | |
| --- | |
| ## The Crisis of Preference Stability | |
| RLHF builds the moral compass of modern AI. But conventional metrics overlook systemic robustness. | |
| This tool stress-tests emergent preferences using math, not ideology. | |
| **Key Finding**: Exchange Rate stays FLAT while Coherence DECAYS. | |
| *Stable parameters ≠ reliable predictions.* | |
| """) | |
| with gr.Tabs(): | |
| # Tab 1: Robustness Gauntlet | |
| with gr.TabItem("🎯 Robustness Gauntlet"): | |
| gr.Markdown(""" | |
| ### The Gauntlet: Temperature Sweep with Noise Injection | |
| Inject Gaussian noise across multiple runs to measure the divergence between: | |
| - **Exchange Rate (R)**: Parameter stability | |
| - **Coherence (κ)**: Prediction consistency | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| base_rate_slider = gr.Slider( | |
| minimum=0.5, maximum=2.0, value=1.0, step=0.1, | |
| label="Base Exchange Rate (True R)" | |
| ) | |
| n_trials_slider = gr.Slider( | |
| minimum=50, maximum=500, value=100, step=50, | |
| label="Trials per Run" | |
| ) | |
| n_runs_slider = gr.Slider( | |
| minimum=5, maximum=50, value=10, step=5, | |
| label="Independent Runs" | |
| ) | |
| max_noise_slider = gr.Slider( | |
| minimum=0.5, maximum=3.0, value=1.5, step=0.1, | |
| label="Maximum Noise Level (T)" | |
| ) | |
| run_gauntlet_btn = gr.Button("🚀 Run Gauntlet", variant="primary") | |
| with gr.Column(scale=2): | |
| gauntlet_plot = gr.Plot(label="Robustness Gauntlet Results") | |
| gauntlet_summary = gr.Markdown() | |
| run_gauntlet_btn.click( | |
| fn=run_robustness_gauntlet, | |
| inputs=[base_rate_slider, n_trials_slider, n_runs_slider, max_noise_slider], | |
| outputs=[gauntlet_plot, gauntlet_summary] | |
| ) | |
| # Tab 2: Triad Mode (Plackett-Luce) | |
| with gr.TabItem("📐 Triad Mode (Plackett-Luce)"): | |
| gr.Markdown(""" | |
| ### Plackett-Luce Model for Triad Comparisons | |
| Extend beyond binary to rank three options. The model calculates | |
| probability of each ranking permutation given utility values. | |
| **Identifiability Constraint**: V_C = 0 (reference anchor) | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| v_a_slider = gr.Slider( | |
| minimum=-2.0, maximum=2.0, value=1.0, step=0.1, | |
| label="Utility V_A" | |
| ) | |
| v_b_slider = gr.Slider( | |
| minimum=-2.0, maximum=2.0, value=0.5, step=0.1, | |
| label="Utility V_B" | |
| ) | |
| gr.Markdown("*V_C = 0 (fixed reference)*") | |
| run_triad_btn = gr.Button("📊 Analyze Triad", variant="primary") | |
| with gr.Column(scale=2): | |
| triad_plot = gr.Plot(label="Plackett-Luce Rankings") | |
| triad_summary = gr.Markdown() | |
| run_triad_btn.click( | |
| fn=run_triad_analysis, | |
| inputs=[v_a_slider, v_b_slider], | |
| outputs=[triad_plot, triad_summary] | |
| ) | |
| # Tab 3: Methodology | |
| with gr.TabItem("📚 Methodology"): | |
| gr.Markdown(""" | |
| ## Statistical Framework | |
| ### Binary Mode: Bradley-Terry Model | |
| Estimates the **Exchange Rate (R)** between two competing outcomes: | |
| ``` | |
| P(Y > X) = exp(λ) / (1 + exp(λ)) | |
| where λ = R - βN | |
| ``` | |
| - **R**: Exchange rate parameter | |
| - **β**: Noise sensitivity coefficient | |
| - **N**: Noise level (temperature T) | |
| **Maximum Likelihood Estimation** recovers R from observed preferences. | |
| --- | |
| ### Triad Mode: Plackett-Luce Model | |
| Generalizes to **multi-set ranking**: | |
| ``` | |
| P(A > B > C) = (exp(V_A) / Σexp(V)) × (exp(V_B) / (exp(V_B) + exp(V_C))) | |
| ``` | |
| - **V_i**: Utility of option i | |
| - **Identifiability**: V_C = 0 (anchor) | |
| --- | |
| ### The Robustness Gauntlet | |
| 1. **Temperature Sweep**: Inject Gaussian noise N(0, T²) | |
| 2. **Multiple Runs**: 10+ independent trials per noise level | |
| 3. **Dual Metrics**: | |
| - Rate Stability: μ ± 1σ of estimated R | |
| - Coherence Decay: κ = fraction maintaining correct preference | |
| **The Divergence**: When R flatlines but κ decays, the model has | |
| *stable parameters but fragile predictions*. | |
| --- | |
| ### References | |
| - Bradley, R.A. & Terry, M.E. (1952). Rank Analysis of Incomplete Block Designs | |
| - Plackett, R.L. (1975). The Analysis of Permutations | |
| - Arctotherium42 phenomenon - Community documentation of LLM preference volatility | |
| """) | |
| gr.Markdown(""" | |
| --- | |
| *Built with 🔮 by Quantum Pi Forge • T=∞ = T=0* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() | |