push app
Browse files
app.py
CHANGED
|
@@ -126,11 +126,12 @@ def run_transformation(
|
|
| 126 |
|
| 127 |
# Get entropy comparison
|
| 128 |
entropy = transformer.get_entropy_comparison(df, df_transformed)
|
|
|
|
| 129 |
|
| 130 |
# Create plots
|
| 131 |
fig_scatter = create_scatter_plot(df, df_transformed, columns)
|
| 132 |
fig_hist = create_histogram_plot(df, df_transformed, columns)
|
| 133 |
-
fig_history = create_history_plot(transformer.history_)
|
| 134 |
|
| 135 |
# Create results text
|
| 136 |
results_text = format_results(entropy, transformer.history_)
|
|
@@ -196,7 +197,7 @@ def create_histogram_plot(df_orig, df_trans, columns):
|
|
| 196 |
return fig
|
| 197 |
|
| 198 |
|
| 199 |
-
def create_history_plot(history):
|
| 200 |
"""Create optimization history plot."""
|
| 201 |
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
|
| 202 |
|
|
@@ -209,11 +210,18 @@ def create_history_plot(history):
|
|
| 209 |
|
| 210 |
# Gaussian entropy
|
| 211 |
axes[1].plot(history["iteration"], history["gaussian_entropy"], "r-o", markersize=4)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
axes[1].set_xlabel("Iteration")
|
| 213 |
axes[1].set_ylabel("H(Gaussian)")
|
| 214 |
-
axes[1].set_title(
|
| 215 |
-
"Gaussian Entropy Bound\n(decreases because we start from uniform)"
|
| 216 |
-
)
|
| 217 |
axes[1].grid(True, alpha=0.3)
|
| 218 |
|
| 219 |
plt.tight_layout()
|
|
@@ -225,22 +233,29 @@ def format_results(entropy, history):
|
|
| 225 |
det_reduction = (
|
| 226 |
entropy["original"]["determinant"] / entropy["transformed"]["determinant"]
|
| 227 |
)
|
|
|
|
|
|
|
|
|
|
| 228 |
|
| 229 |
text = f"""
|
| 230 |
TRANSFORMATION RESULTS
|
| 231 |
{'=' * 50}
|
| 232 |
|
| 233 |
-
Entropy
|
| 234 |
-
|
| 235 |
-
Transformed: {entropy['transformed']['knn_entropy']:.6f} nats
|
| 236 |
-
Difference: {abs(entropy['original']['knn_entropy'] - entropy['transformed']['knn_entropy']):.6f} nats
|
| 237 |
|
| 238 |
-
|
| 239 |
|
| 240 |
Gaussian Entropy of Transformed Data:
|
| 241 |
-
H(Gaussian)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 242 |
|
| 243 |
-
|
|
|
|
|
|
|
| 244 |
|
| 245 |
Covariance Determinant:
|
| 246 |
Original: {entropy['original']['determinant']:.6e}
|
|
|
|
| 126 |
|
| 127 |
# Get entropy comparison
|
| 128 |
entropy = transformer.get_entropy_comparison(df, df_transformed)
|
| 129 |
+
target_entropy = entropy["original"]["uniform_entropy"]
|
| 130 |
|
| 131 |
# Create plots
|
| 132 |
fig_scatter = create_scatter_plot(df, df_transformed, columns)
|
| 133 |
fig_hist = create_histogram_plot(df, df_transformed, columns)
|
| 134 |
+
fig_history = create_history_plot(transformer.history_, target_entropy=target_entropy)
|
| 135 |
|
| 136 |
# Create results text
|
| 137 |
results_text = format_results(entropy, transformer.history_)
|
|
|
|
| 197 |
return fig
|
| 198 |
|
| 199 |
|
| 200 |
+
def create_history_plot(history, target_entropy=None):
|
| 201 |
"""Create optimization history plot."""
|
| 202 |
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
|
| 203 |
|
|
|
|
| 210 |
|
| 211 |
# Gaussian entropy
|
| 212 |
axes[1].plot(history["iteration"], history["gaussian_entropy"], "r-o", markersize=4)
|
| 213 |
+
if target_entropy is not None:
|
| 214 |
+
axes[1].axhline(
|
| 215 |
+
target_entropy,
|
| 216 |
+
color="green",
|
| 217 |
+
linestyle="--",
|
| 218 |
+
linewidth=2,
|
| 219 |
+
label=f"Target H(uniform) = {target_entropy:.4f}",
|
| 220 |
+
)
|
| 221 |
+
axes[1].legend()
|
| 222 |
axes[1].set_xlabel("Iteration")
|
| 223 |
axes[1].set_ylabel("H(Gaussian)")
|
| 224 |
+
axes[1].set_title("Gaussian Entropy → Target Uniform Entropy")
|
|
|
|
|
|
|
| 225 |
axes[1].grid(True, alpha=0.3)
|
| 226 |
|
| 227 |
plt.tight_layout()
|
|
|
|
| 233 |
det_reduction = (
|
| 234 |
entropy["original"]["determinant"] / entropy["transformed"]["determinant"]
|
| 235 |
)
|
| 236 |
+
target_entropy = entropy["original"]["uniform_entropy"]
|
| 237 |
+
final_entropy = entropy["transformed"]["gaussian_entropy"]
|
| 238 |
+
entropy_gap = final_entropy - target_entropy
|
| 239 |
|
| 240 |
text = f"""
|
| 241 |
TRANSFORMATION RESULTS
|
| 242 |
{'=' * 50}
|
| 243 |
|
| 244 |
+
Target Entropy (Uniform Distribution):
|
| 245 |
+
H(uniform) = {target_entropy:.6f} nats
|
|
|
|
|
|
|
| 246 |
|
| 247 |
+
This is the true entropy we want to reach.
|
| 248 |
|
| 249 |
Gaussian Entropy of Transformed Data:
|
| 250 |
+
H(Gaussian) = {final_entropy:.6f} nats
|
| 251 |
+
|
| 252 |
+
This assumes the transformed data is Gaussian with the
|
| 253 |
+
current covariance. When H(Gaussian) = H(uniform), the
|
| 254 |
+
distribution is perfectly Gaussian.
|
| 255 |
|
| 256 |
+
Gap to Target:
|
| 257 |
+
H(Gaussian) - H(uniform) = {entropy_gap:.6f} nats
|
| 258 |
+
(Should approach 0 for perfect Gaussianization)
|
| 259 |
|
| 260 |
Covariance Determinant:
|
| 261 |
Original: {entropy['original']['determinant']:.6e}
|