Spaces:

vkapoor
/

Entra

Sleeping

App Files Files Community

vkapoor commited on Dec 27, 2025

Commit

8e1b079

1 Parent(s): d9a6f3b

add application file

Browse files

Files changed (1) hide show

app.py +460 -0

app.py ADDED Viewed

	@@ -0,0 +1,460 @@

+"""
+Gradio App for Entropy-Conserving Transformations
+This app demonstrates how divergence-free vector fields can transform
+arbitrary distributions towards Gaussian form while conserving entropy.
+"""
+import gradio as gr
+import matplotlib
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from entra import DataFrameTransformer
+matplotlib.use("Agg")
+def generate_uniform_data(n_per_dim: int = 20, dimensions: int = 2) -> pd.DataFrame:
+    """Generate uniform grid data."""
+    if dimensions == 2:
+        x = np.linspace(-10, 10, n_per_dim)
+        y = np.linspace(-10, 10, n_per_dim)
+        xx, yy = np.meshgrid(x, y)
+        df = pd.DataFrame({"x": xx.ravel(), "y": yy.ravel()})
+    else:  # 3D
+        x = np.linspace(-10, 10, n_per_dim)
+        y = np.linspace(-10, 10, n_per_dim)
+        z = np.linspace(-10, 10, n_per_dim)
+        xx, yy, zz = np.meshgrid(x, y, z)
+        df = pd.DataFrame({"x": xx.ravel(), "y": yy.ravel(), "z": zz.ravel()})
+    return df
+def generate_sample_csv(n_per_dim: int, dimensions: int):
+    """Generate sample CSV and return as downloadable file."""
+    df = generate_uniform_data(n_per_dim, dimensions)
+    # Save to temp file for download
+    temp_path = "/tmp/generated_uniform_data.csv"
+    df.to_csv(temp_path, index=False)
+    n_points = len(df)
+    cols = list(df.columns)
+    preview = df.head(10).to_string()
+    return (
+        temp_path,
+        f"Generated {n_points} points with columns: {cols}\n\nPreview:\n{preview}",
+        df,
+    )
+def load_csv_file(file):
+    """Load uploaded CSV file."""
+    if file is None:
+        return None, "No file uploaded", None
+    df = pd.read_csv(file.name)
+    n_points = len(df)
+    cols = list(df.columns)
+    preview = df.head(10).to_string()
+    return (
+        file.name,
+        f"Loaded {n_points} points with columns: {cols}\n\nPreview:\n{preview}",
+        df,
+    )
+def run_transformation(
+    df_state,
+    columns_str: str,
+    sigma: float,
+    max_iterations: int,
+):
+    """Run the LM optimization and return results."""
+    if df_state is None:
+        return (
+            None,
+            None,
+            None,
+            "Error: No data loaded. Please upload or generate data first.",
+        )
+    df = df_state
+    # Parse columns
+    columns = [c.strip() for c in columns_str.split(",")]
+    # Validate columns exist
+    missing = [c for c in columns if c not in df.columns]
+    if missing:
+        return (
+            None,
+            None,
+            None,
+            f"Error: Columns not found: {missing}. Available: {list(df.columns)}",
+        )
+    # Create transformer
+    transformer = DataFrameTransformer(
+        sigma=sigma,
+        max_iterations=max_iterations,
+        verbose=False,
+    )
+    # Run transformation
+    df_transformed = transformer.fit_transform(df, columns=columns)
+    # Get entropy comparison
+    entropy = transformer.get_entropy_comparison(df, df_transformed)
+    # Create plots
+    fig_scatter = create_scatter_plot(df, df_transformed, columns)
+    fig_hist = create_histogram_plot(df, df_transformed, columns)
+    fig_history = create_history_plot(transformer.history_)
+    # Create results text
+    results_text = format_results(entropy, transformer.history_)
+    return fig_scatter, fig_hist, fig_history, results_text
+def create_scatter_plot(df_orig, df_trans, columns):
+    """Create before/after scatter plot."""
+    fig, axes = plt.subplots(1, 2, figsize=(12, 5))
+    if len(columns) >= 2:
+        x_col, y_col = columns[0], columns[1]
+        axes[0].scatter(df_orig[x_col], df_orig[y_col], c="blue", alpha=0.5, s=10)
+        axes[0].set_xlabel(x_col)
+        axes[0].set_ylabel(y_col)
+        axes[0].set_title("Original Distribution")
+        axes[0].set_aspect("equal")
+        axes[0].grid(True, alpha=0.3)
+        axes[1].scatter(df_trans[x_col], df_trans[y_col], c="red", alpha=0.5, s=10)
+        axes[1].set_xlabel(x_col)
+        axes[1].set_ylabel(y_col)
+        axes[1].set_title("Transformed (Towards Gaussian)")
+        axes[1].set_aspect("equal")
+        axes[1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    return fig
+def create_histogram_plot(df_orig, df_trans, columns):
+    """Create marginal histogram plots."""
+    n_cols = min(len(columns), 3)
+    fig, axes = plt.subplots(n_cols, 2, figsize=(12, 4 * n_cols))
+    if n_cols == 1:
+        axes = axes.reshape(1, -1)
+    for i, col in enumerate(columns[:n_cols]):
+        # Original
+        axes[i, 0].hist(df_orig[col], bins=30, density=True, alpha=0.7, color="blue")
+        axes[i, 0].set_xlabel(col)
+        axes[i, 0].set_ylabel("Density")
+        axes[i, 0].set_title(f"Original {col} Marginal")
+        # Transformed with Gaussian overlay
+        axes[i, 1].hist(df_trans[col], bins=30, density=True, alpha=0.7, color="red")
+        x_range = np.linspace(df_trans[col].min(), df_trans[col].max(), 100)
+        mu = df_trans[col].mean()
+        std = df_trans[col].std()
+        gaussian = (1 / (std * np.sqrt(2 * np.pi))) * np.exp(
+            -0.5 * ((x_range - mu) / std) ** 2
+        )
+        axes[i, 1].plot(x_range, gaussian, "k--", linewidth=2, label="Gaussian fit")
+        axes[i, 1].set_xlabel(col)
+        axes[i, 1].set_ylabel("Density")
+        axes[i, 1].set_title(f"Transformed {col} Marginal")
+        axes[i, 1].legend()
+    plt.tight_layout()
+    return fig
+def create_history_plot(history):
+    """Create optimization history plot."""
+    fig, axes = plt.subplots(1, 2, figsize=(12, 4))
+    # Determinant
+    axes[0].semilogy(history["iteration"], history["determinant"], "b-o", markersize=4)
+    axes[0].set_xlabel("Iteration")
+    axes[0].set_ylabel("Covariance Determinant")
+    axes[0].set_title("Determinant Minimization")
+    axes[0].grid(True, alpha=0.3)
+    # Gaussian entropy
+    axes[1].plot(history["iteration"], history["gaussian_entropy"], "r-o", markersize=4)
+    axes[1].set_xlabel("Iteration")
+    axes[1].set_ylabel("H(Gaussian)")
+    axes[1].set_title(
+        "Gaussian Entropy Bound\n(decreases because we start from uniform)"
+    )
+    axes[1].grid(True, alpha=0.3)
+    plt.tight_layout()
+    return fig
+def format_results(entropy, history):
+    """Format results as text."""
+    det_reduction = (
+        entropy["original"]["determinant"] / entropy["transformed"]["determinant"]
+    )
+    text = f"""
+TRANSFORMATION RESULTS
+{'=' * 50}
+Entropy Comparison (k-NN estimator):
+  Original:    {entropy['original']['knn_entropy']:.6f} nats
+  Transformed: {entropy['transformed']['knn_entropy']:.6f} nats
+  Difference:  {abs(entropy['original']['knn_entropy'] - entropy['transformed']['knn_entropy']):.6f} nats
+  (k-NN entropy should remain ~constant for volume-preserving transformation)
+Gaussian Entropy of Transformed Data:
+  H(Gaussian): {entropy['transformed']['gaussian_entropy']:.6f} nats
+  (This is the entropy IF the transformed data were perfectly Gaussian)
+Covariance Determinant:
+  Original:    {entropy['original']['determinant']:.6e}
+  Transformed: {entropy['transformed']['determinant']:.6e}
+  Reduction:   {det_reduction:.2f}x
+Optimization:
+  Iterations with improvement: {len(history['iteration'])}
+  Final determinant: {history['determinant'][-1]:.6e}
+  Final H(Gaussian): {history['gaussian_entropy'][-1]:.6f}
+"""
+    return text
+# Markdown explanation of Levenberg-Marquardt
+LM_EXPLANATION = """
+## How the Levenberg-Marquardt Algorithm Works
+The **Levenberg-Marquardt (LM) algorithm** is used to minimize the covariance determinant. Unlike gradient descent, **LM has no learning rate** - here's why:
+### The Key Insight
+LM is designed for **least-squares problems** where you minimize a sum of squared residuals. Instead of taking steps proportional to the gradient (like gradient descent), LM solves a **local linear approximation** of the problem at each step.
+### How It Works
+1. **Compute the Jacobian** `J` - the matrix of partial derivatives of residuals with respect to parameters
+2. **Solve the normal equations**:
+   ```
+   (J^T J + λI) δ = -J^T r
+   ```
+   where `r` is the residual vector and `λ` is a damping parameter
+3. **The damping parameter λ replaces the learning rate**:
+   - When `λ` is **large**: The step is small and in the gradient direction (like gradient descent with small learning rate)
+   - When `λ` is **small**: The step approaches the Gauss-Newton step (a direct jump to the local minimum of the quadratic approximation)
+4. **Adaptive adjustment**:
+   - If a step **decreases** the objective: Accept it and **decrease λ** (take bigger steps)
+   - If a step **increases** the objective: Reject it and **increase λ** (take smaller, safer steps)
+### Why No Learning Rate?
+The LM algorithm **automatically adapts** its step size through the damping parameter λ:
+- It starts cautious (large λ, small steps)
+- As it finds a good direction, it becomes more aggressive (small λ, large steps)
+- If it overshoots, it backs off automatically
+This makes LM much more robust than gradient descent - you don't need to tune a learning rate!
+### In This Application
+We minimize `log(det(Cov))` where `Cov` is the covariance matrix of the transformed points. The transformation is parameterized by coefficients of divergence-free basis functions, ensuring the transformation is **volume-preserving** and thus **entropy-conserving**.
+"""
+THEORY_EXPLANATION = """
+## Theoretical Background
+### Maximum Entropy Principle
+A fundamental theorem states: **Among all distributions with a given covariance matrix, the Gaussian has maximum entropy.**
+This means for any distribution with entropy `H₀` and covariance `Σ`:
+- The Gaussian with the same covariance has entropy `H_Gaussian(Σ) ≥ H₀`
+- Equality holds only when the distribution is Gaussian
+### The Key Insight
+If we apply a **volume-preserving transformation**:
+1. The entropy stays fixed at `H₀` (entropy is conserved)
+2. But the covariance changes
+By **minimizing the covariance determinant** while preserving entropy:
+- We reduce `H_Gaussian(Σ)` (the Gaussian entropy bound)
+- When `H_Gaussian(Σ) = H₀`, the distribution must be Gaussian!
+### Why Divergence-Free?
+Divergence-free vector fields define **volume-preserving** transformations:
+- The Jacobian determinant equals 1 everywhere
+- Total probability volume is conserved
+- **Entropy is conserved** under the transformation
+This is the incompressibility condition from fluid dynamics: `∇·v = 0`
+### The Operator
+We construct divergence-free basis functions using Lowitzsch's operator:
+**Ô = -I∇² + ∇∇ᵀ**
+Applied to Gaussian RBFs, this produces matrix-valued functions where each column is a divergence-free vector field.
+"""
+def create_app():
+    """Create the Gradio interface."""
+    with gr.Blocks(
+        title="Entropy-Conserving Transformations", theme=gr.themes.Soft()
+    ) as app:
+        gr.Markdown(
+            """
+        # Entropy-Conserving Transformations Using Divergence-Free Vector Fields
+        Transform arbitrary distributions towards Gaussian form while **conserving entropy**.
+        This demo uses divergence-free basis functions to create volume-preserving transformations,
+        then minimizes the covariance determinant using the Levenberg-Marquardt algorithm.
+        """
+        )
+        # State to hold the dataframe
+        df_state = gr.State(None)
+        with gr.Tabs():
+            with gr.Tab("Transform Data"):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Step 1: Load or Generate Data")
+                        with gr.Accordion("Option A: Upload CSV", open=True):
+                            file_upload = gr.File(
+                                label="Upload CSV file", file_types=[".csv"]
+                            )
+                            upload_btn = gr.Button("Load CSV", variant="secondary")
+                        with gr.Accordion("Option B: Generate Uniform Data", open=True):
+                            n_per_dim = gr.Slider(
+                                minimum=5,
+                                maximum=50,
+                                value=20,
+                                step=1,
+                                label="Points per dimension",
+                            )
+                            dimensions = gr.Radio(
+                                choices=[2, 3], value=2, label="Dimensions"
+                            )
+                            generate_btn = gr.Button(
+                                "Generate Uniform Distribution",
+                                variant="secondary",
+                            )
+                            download_file = gr.File(label="Download generated CSV")
+                        data_info = gr.Textbox(
+                            label="Data Info", lines=8, interactive=False
+                        )
+                        gr.Markdown("### Step 2: Configure Transformation")
+                        columns_input = gr.Textbox(
+                            value="x, y",
+                            label="Columns to transform (comma-separated)",
+                        )
+                        sigma = gr.Slider(
+                            minimum=0.1,
+                            maximum=20.0,
+                            value=5.0,
+                            step=0.1,
+                            label="Sigma (RBF width)",
+                        )
+                        max_iterations = gr.Slider(
+                            minimum=10,
+                            maximum=500,
+                            value=100,
+                            step=10,
+                            label="Max iterations",
+                        )
+                        transform_btn = gr.Button(
+                            "Run Transformation", variant="primary", size="lg"
+                        )
+                    with gr.Column(scale=2):
+                        gr.Markdown("### Results")
+                        results_text = gr.Textbox(
+                            label="Transformation Results",
+                            lines=20,
+                            interactive=False,
+                        )
+                        with gr.Row():
+                            scatter_plot = gr.Plot(label="Before/After Scatter")
+                        with gr.Row():
+                            hist_plot = gr.Plot(label="Marginal Distributions")
+                        with gr.Row():
+                            history_plot = gr.Plot(label="Optimization History")
+            with gr.Tab("How LM Works"):
+                gr.Markdown(LM_EXPLANATION)
+            with gr.Tab("Theory"):
+                gr.Markdown(THEORY_EXPLANATION)
+        # Event handlers
+        def on_generate(n, dims):
+            path, info, df = generate_sample_csv(n, dims)
+            return path, info, df
+        def on_upload(file):
+            path, info, df = load_csv_file(file)
+            return info, df
+        generate_btn.click(
+            fn=on_generate,
+            inputs=[n_per_dim, dimensions],
+            outputs=[download_file, data_info, df_state],
+        )
+        upload_btn.click(
+            fn=on_upload, inputs=[file_upload], outputs=[data_info, df_state]
+        )
+        transform_btn.click(
+            fn=run_transformation,
+            inputs=[
+                df_state,
+                columns_input,
+                sigma,
+                max_iterations,
+            ],
+            outputs=[scatter_plot, hist_plot, history_plot, results_text],
+        )
+    return app
+if __name__ == "__main__":
+    app = create_app()
+    app.launch()