Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,85 +3,193 @@ import matplotlib.pyplot as plt
|
|
| 3 |
import io, base64
|
| 4 |
from fastapi import FastAPI
|
| 5 |
from fastapi.responses import HTMLResponse
|
| 6 |
-
from pydantic import BaseModel
|
| 7 |
from sklearn.decomposition import PCA
|
| 8 |
-
from sklearn.cluster import
|
|
|
|
| 9 |
|
| 10 |
app = FastAPI()
|
| 11 |
|
| 12 |
-
class
|
| 13 |
-
def
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
|
| 27 |
# --- THE VISUALIZER ---
|
| 28 |
def generate_plot(mode='global', scenario='split'):
|
| 29 |
# 1. Generate High-Dim Data (128-dim)
|
| 30 |
-
np.random.seed(
|
|
|
|
| 31 |
if scenario == 'split':
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
| 35 |
else:
|
| 36 |
-
|
|
|
|
| 37 |
|
| 38 |
-
# 2.
|
| 39 |
-
sys =
|
| 40 |
-
center_vec,
|
| 41 |
|
| 42 |
-
# 3. PCA Projection to 2D
|
|
|
|
| 43 |
pca = PCA(n_components=2)
|
| 44 |
-
all_points = np.vstack([data, center_vec])
|
| 45 |
-
projected = pca.fit_transform(all_points)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
buf = io.BytesIO()
|
| 58 |
-
plt.savefig(buf, format='png')
|
| 59 |
plt.close()
|
| 60 |
return base64.b64encode(buf.getvalue()).decode('utf-8')
|
| 61 |
|
| 62 |
@app.get("/", response_class=HTMLResponse)
|
| 63 |
async def root():
|
| 64 |
-
#
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
return f"""
|
| 69 |
<html>
|
| 70 |
-
<body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:
|
| 71 |
-
<h1>Vector Convergence
|
| 72 |
-
<
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
<div>
|
| 79 |
-
<h3>
|
| 80 |
-
<p>
|
| 81 |
-
<img src="data:image/png;base64,{
|
| 82 |
</div>
|
| 83 |
</div>
|
| 84 |
-
<p style="margin-top:30px;">This demonstrates 128-dimensional relationships projected into 2D.</p>
|
| 85 |
</body>
|
| 86 |
</html>
|
| 87 |
"""
|
|
|
|
| 3 |
import io, base64
|
| 4 |
from fastapi import FastAPI
|
| 5 |
from fastapi.responses import HTMLResponse
|
|
|
|
| 6 |
from sklearn.decomposition import PCA
|
| 7 |
+
from sklearn.cluster import AgglomerativeClustering
|
| 8 |
+
from sklearn.metrics.pairwise import euclidean_distances
|
| 9 |
|
| 10 |
app = FastAPI()
|
| 11 |
|
| 12 |
+
class VectorConvergenceSystem:
|
| 13 |
+
def __init__(self):
|
| 14 |
+
# 128-dim vectors usually have distances in range 10-20.
|
| 15 |
+
# We adjust scoring sensitivity based on typical vector behavior.
|
| 16 |
+
self.score_sensitivity = 2.0
|
| 17 |
+
|
| 18 |
+
def _calculate_score(self, target_vector, constituent_vectors):
|
| 19 |
+
"""
|
| 20 |
+
Calculates a score between -1 and 10.
|
| 21 |
+
Logic:
|
| 22 |
+
1. Calculate distances from result vector to all inputs used.
|
| 23 |
+
2. Calculate the 'spread' (standard deviation + mean distance).
|
| 24 |
+
3. Higher spread = More effort/chaos = Lower score.
|
| 25 |
+
"""
|
| 26 |
+
if len(constituent_vectors) == 0:
|
| 27 |
+
return -1.0
|
| 28 |
+
|
| 29 |
+
dists = euclidean_distances([target_vector], constituent_vectors)[0]
|
| 30 |
+
|
| 31 |
+
# 'Effort' is a mix of how far they are (mean) and how scattered they are (std)
|
| 32 |
+
effort = np.mean(dists) + np.std(dists)
|
| 33 |
+
|
| 34 |
+
# Map logic:
|
| 35 |
+
# If effort is 0 (identical points), score = 10.
|
| 36 |
+
# If effort is high (e.g. 10.0), score drops.
|
| 37 |
+
# Formula: 10 - (effort * sensitivity)
|
| 38 |
+
raw_score = 10.0 - (effort * 0.5)
|
| 39 |
+
|
| 40 |
+
# Clamp between -1 and 10
|
| 41 |
+
return max(-1.0, min(10.0, raw_score))
|
| 42 |
+
|
| 43 |
+
def predict_point(self, vectors, mode='global'):
|
| 44 |
+
vectors = np.array(vectors)
|
| 45 |
+
|
| 46 |
+
if mode == 'global':
|
| 47 |
+
# Global: The geometric center of ALL points
|
| 48 |
+
center = np.mean(vectors, axis=0)
|
| 49 |
+
score = self._calculate_score(center, vectors)
|
| 50 |
+
return center, score, vectors # Return all vectors as constituents
|
| 51 |
+
|
| 52 |
+
elif mode == 'cluster':
|
| 53 |
+
# Cluster: Find groups, pick the largest/densest, ignore outliers
|
| 54 |
+
# Using AgglomerativeClustering is often more stable than DBSCAN for fixed-size batches
|
| 55 |
+
# distance_threshold determines how far points can be to be same group
|
| 56 |
+
clusterer = AgglomerativeClustering(
|
| 57 |
+
n_clusters=None,
|
| 58 |
+
metric='euclidean',
|
| 59 |
+
linkage='ward',
|
| 60 |
+
distance_threshold=15.0 # Adjusted for 128-dim space
|
| 61 |
+
)
|
| 62 |
+
labels = clusterer.fit_predict(vectors)
|
| 63 |
+
|
| 64 |
+
# Find largest cluster
|
| 65 |
+
unique_labels, counts = np.unique(labels, return_counts=True)
|
| 66 |
+
largest_label = unique_labels[np.argmax(counts)]
|
| 67 |
+
|
| 68 |
+
# Filter vectors belonging to this cluster
|
| 69 |
+
cluster_vectors = vectors[labels == largest_label]
|
| 70 |
+
|
| 71 |
+
# Calculate center of just this cluster
|
| 72 |
+
center = np.mean(cluster_vectors, axis=0)
|
| 73 |
+
|
| 74 |
+
# Score is based ONLY on the vectors in the cluster (convergence of the solution)
|
| 75 |
+
score = self._calculate_score(center, cluster_vectors)
|
| 76 |
+
|
| 77 |
+
return center, score, cluster_vectors
|
| 78 |
|
| 79 |
# --- THE VISUALIZER ---
|
| 80 |
def generate_plot(mode='global', scenario='split'):
|
| 81 |
# 1. Generate High-Dim Data (128-dim)
|
| 82 |
+
np.random.seed(99) # Fixed seed for consistency
|
| 83 |
+
|
| 84 |
if scenario == 'split':
|
| 85 |
+
# Two distinct groups far apart
|
| 86 |
+
c1 = np.random.normal(0, 1.5, (15, 128)) # Cluster A
|
| 87 |
+
c2 = np.random.normal(12, 1.5, (15, 128)) # Cluster B (Far away)
|
| 88 |
+
# Add some random noise points
|
| 89 |
+
noise = np.random.uniform(-5, 15, (5, 128))
|
| 90 |
+
data = np.vstack([c1, c2, noise])
|
| 91 |
else:
|
| 92 |
+
# One tight group
|
| 93 |
+
data = np.random.normal(0, 0.5, (40, 128))
|
| 94 |
|
| 95 |
+
# 2. Run System
|
| 96 |
+
sys = VectorConvergenceSystem()
|
| 97 |
+
center_vec, score, used_vectors = sys.predict_point(data, mode)
|
| 98 |
|
| 99 |
+
# 3. PCA Projection to 2D for visualization
|
| 100 |
+
# We must fit PCA on everything including the calculated center to align coordinates
|
| 101 |
pca = PCA(n_components=2)
|
|
|
|
|
|
|
| 102 |
|
| 103 |
+
# Combine data for PCA fit
|
| 104 |
+
combined = np.vstack([data, center_vec])
|
| 105 |
+
projected = pca.fit_transform(combined)
|
| 106 |
+
|
| 107 |
+
pts_2d = projected[:-1] # All input points
|
| 108 |
+
center_2d = projected[-1] # The calculated center
|
| 109 |
+
|
| 110 |
+
# Identify which points were used (for coloring)
|
| 111 |
+
# We do a quick matching logic or simply rely on visual proximity for the demo
|
| 112 |
+
# But strictly, we want to color 'used_vectors' differently.
|
| 113 |
+
# To do this simply in 2D without complex index tracking for the demo:
|
| 114 |
+
# We'll just plot everything blue, and draw lines to the center.
|
| 115 |
+
|
| 116 |
+
plt.figure(figsize=(7, 5), facecolor='#f0f0f0')
|
| 117 |
+
ax = plt.gca()
|
| 118 |
+
ax.set_facecolor('#ffffff')
|
| 119 |
+
|
| 120 |
+
# Plot all input points (faint blue)
|
| 121 |
+
plt.scatter(pts_2d[:, 0], pts_2d[:, 1], c='gray', alpha=0.3, label='Ignored Inputs')
|
| 122 |
+
|
| 123 |
+
# Re-find the indices of used_vectors in the original data to plot them specifically
|
| 124 |
+
# (Using simple distance check for visualization mapping)
|
| 125 |
+
used_indices = []
|
| 126 |
+
for uv in used_vectors:
|
| 127 |
+
# Find index in original data
|
| 128 |
+
dists = np.linalg.norm(data - uv, axis=1)
|
| 129 |
+
used_indices.append(np.argmin(dists))
|
| 130 |
+
|
| 131 |
+
used_pts_2d = pts_2d[used_indices]
|
| 132 |
+
|
| 133 |
+
# Plot used points (strong blue)
|
| 134 |
+
plt.scatter(used_pts_2d[:, 0], used_pts_2d[:, 1], c='#007acc', alpha=0.8, label='Constituent Inputs')
|
| 135 |
+
|
| 136 |
+
# Draw lines from Center to Used Points (Visualizes "Gravitation")
|
| 137 |
+
for pt in used_pts_2d:
|
| 138 |
+
plt.plot([center_2d[0], pt[0]], [center_2d[1], pt[1]], c='#007acc', alpha=0.1)
|
| 139 |
+
|
| 140 |
+
# Plot Center
|
| 141 |
+
plt.scatter(center_2d[0], center_2d[1], c='#ff4444', s=150, marker='X', edgecolors='black', label='Generated Vector')
|
| 142 |
+
|
| 143 |
+
plt.title(f"Mode: {mode.upper()} | Score: {score:.2f}/10\nScenario: {scenario}", fontsize=10)
|
| 144 |
+
plt.legend(loc='best', fontsize=8)
|
| 145 |
+
plt.grid(True, linestyle='--', alpha=0.3)
|
| 146 |
|
| 147 |
buf = io.BytesIO()
|
| 148 |
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
| 149 |
plt.close()
|
| 150 |
return base64.b64encode(buf.getvalue()).decode('utf-8')
|
| 151 |
|
| 152 |
@app.get("/", response_class=HTMLResponse)
|
| 153 |
async def root():
|
| 154 |
+
# Scenario 1: Split Data (Two islands)
|
| 155 |
+
img_global_split = generate_plot('global', 'split')
|
| 156 |
+
img_cluster_split = generate_plot('cluster', 'split')
|
| 157 |
+
|
| 158 |
+
# Scenario 2: Tight Data (One clump)
|
| 159 |
+
img_global_tight = generate_plot('global', 'tight')
|
| 160 |
|
| 161 |
return f"""
|
| 162 |
<html>
|
| 163 |
+
<body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:#e0e0e0; padding:20px;">
|
| 164 |
+
<h1 style="color:#ffffff;">Vector Convergence System</h1>
|
| 165 |
+
<p>128-Dimensional Space projected to 2D</p>
|
| 166 |
+
|
| 167 |
+
<div style="background:#2a2a2a; padding:20px; border-radius:15px; margin-bottom:20px; display:inline-block;">
|
| 168 |
+
<h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario A: Scattered Data (Two Clusters)</h2>
|
| 169 |
+
<div style="display:flex; justify-content:center; gap:20px;">
|
| 170 |
+
<div>
|
| 171 |
+
<h3 style="color:#66b3ff;">Global Mode</h3>
|
| 172 |
+
<p style="font-size:0.9em; max-width:300px;">Averages everything. Result lands in the middle of nowhere (the dead zone). Low convergence score because inputs are far from result.</p>
|
| 173 |
+
<img src="data:image/png;base64,{img_global_split}" style="border-radius:8px; border:1px solid #444;"/>
|
| 174 |
+
</div>
|
| 175 |
+
<div>
|
| 176 |
+
<h3 style="color:#ff9999;">Cluster Mode</h3>
|
| 177 |
+
<p style="font-size:0.9em; max-width:300px;">Detects the largest group. Ignores the smaller cluster and noise. High convergence score because the chosen points are tight.</p>
|
| 178 |
+
<img src="data:image/png;base64,{img_cluster_split}" style="border-radius:8px; border:1px solid #444;"/>
|
| 179 |
+
</div>
|
| 180 |
</div>
|
| 181 |
+
</div>
|
| 182 |
+
|
| 183 |
+
<br/>
|
| 184 |
+
|
| 185 |
+
<div style="background:#2a2a2a; padding:20px; border-radius:15px; display:inline-block;">
|
| 186 |
+
<h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario B: Converged Data</h2>
|
| 187 |
<div>
|
| 188 |
+
<h3 style="color:#99ff99;">Global Mode</h3>
|
| 189 |
+
<p style="font-size:0.9em; max-width:300px;">Inputs are already unified. The system exerts minimal effort. Score is near max.</p>
|
| 190 |
+
<img src="data:image/png;base64,{img_global_tight}" style="border-radius:8px; border:1px solid #444;"/>
|
| 191 |
</div>
|
| 192 |
</div>
|
|
|
|
| 193 |
</body>
|
| 194 |
</html>
|
| 195 |
"""
|