everydaytok commited on
Commit
220daab
·
verified ·
1 Parent(s): aa8d0d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -126
app.py CHANGED
@@ -9,141 +9,180 @@ from sklearn.metrics.pairwise import euclidean_distances
9
 
10
  app = FastAPI()
11
 
12
- class VectorConvergenceSystem:
13
- def __init__(self):
14
- # 128-dim vectors usually have distances in range 10-20.
15
- # We adjust scoring sensitivity based on typical vector behavior.
16
- self.score_sensitivity = 2.0
17
-
18
- def _calculate_score(self, target_vector, constituent_vectors):
19
  """
20
- Calculates a score between -1 and 10.
21
- Logic:
22
- 1. Calculate distances from result vector to all inputs used.
23
- 2. Calculate the 'spread' (standard deviation + mean distance).
24
- 3. Higher spread = More effort/chaos = Lower score.
25
  """
26
- if len(constituent_vectors) == 0:
27
  return -1.0
28
 
29
- dists = euclidean_distances([target_vector], constituent_vectors)[0]
 
 
 
 
 
 
 
 
 
 
30
 
31
- # 'Effort' is a mix of how far they are (mean) and how scattered they are (std)
32
- effort = np.mean(dists) + np.std(dists)
33
 
34
- # Map logic:
35
- # If effort is 0 (identical points), score = 10.
36
- # If effort is high (e.g. 10.0), score drops.
37
- # Formula: 10 - (effort * sensitivity)
38
- raw_score = 10.0 - (effort * 0.5)
39
 
40
- # Clamp between -1 and 10
41
- return max(-1.0, min(10.0, raw_score))
42
 
43
  def predict_point(self, vectors, mode='global'):
44
- vectors = np.array(vectors)
45
 
 
 
 
46
  if mode == 'global':
47
- # Global: The geometric center of ALL points
48
- center = np.mean(vectors, axis=0)
49
- score = self._calculate_score(center, vectors)
50
- return center, score, vectors # Return all vectors as constituents
51
 
 
 
52
  elif mode == 'cluster':
53
- # Cluster: Find groups, pick the largest/densest, ignore outliers
54
- # Using AgglomerativeClustering is often more stable than DBSCAN for fixed-size batches
55
- # distance_threshold determines how far points can be to be same group
 
 
 
 
 
 
 
 
 
56
  clusterer = AgglomerativeClustering(
57
- n_clusters=None,
58
- metric='euclidean',
59
- linkage='ward',
60
- distance_threshold=15.0 # Adjusted for 128-dim space
61
  )
62
- labels = clusterer.fit_predict(vectors)
63
 
64
- # Find largest cluster
 
65
  unique_labels, counts = np.unique(labels, return_counts=True)
66
- largest_label = unique_labels[np.argmax(counts)]
67
 
68
- # Filter vectors belonging to this cluster
69
- cluster_vectors = vectors[labels == largest_label]
70
 
71
- # Calculate center of just this cluster
72
- center = np.mean(cluster_vectors, axis=0)
 
 
 
 
 
73
 
74
- # Score is based ONLY on the vectors in the cluster (convergence of the solution)
 
 
75
  score = self._calculate_score(center, cluster_vectors)
76
 
77
  return center, score, cluster_vectors
78
 
79
- # --- THE VISUALIZER ---
80
  def generate_plot(mode='global', scenario='split'):
81
- # 1. Generate High-Dim Data (128-dim)
82
- np.random.seed(99) # Fixed seed for consistency
83
 
84
  if scenario == 'split':
85
- # Two distinct groups far apart
86
- c1 = np.random.normal(0, 1.5, (15, 128)) # Cluster A
87
- c2 = np.random.normal(12, 1.5, (15, 128)) # Cluster B (Far away)
88
- # Add some random noise points
89
- noise = np.random.uniform(-5, 15, (5, 128))
 
 
90
  data = np.vstack([c1, c2, noise])
91
  else:
92
- # One tight group
93
- data = np.random.normal(0, 0.5, (40, 128))
94
 
95
- # 2. Run System
96
- sys = VectorConvergenceSystem()
97
  center_vec, score, used_vectors = sys.predict_point(data, mode)
98
 
99
- # 3. PCA Projection to 2D for visualization
100
- # We must fit PCA on everything including the calculated center to align coordinates
101
  pca = PCA(n_components=2)
 
 
102
 
103
- # Combine data for PCA fit
104
- combined = np.vstack([data, center_vec])
105
- projected = pca.fit_transform(combined)
106
-
107
- pts_2d = projected[:-1] # All input points
108
- center_2d = projected[-1] # The calculated center
109
-
110
- # Identify which points were used (for coloring)
111
- # We do a quick matching logic or simply rely on visual proximity for the demo
112
- # But strictly, we want to color 'used_vectors' differently.
113
- # To do this simply in 2D without complex index tracking for the demo:
114
- # We'll just plot everything blue, and draw lines to the center.
115
 
116
- plt.figure(figsize=(7, 5), facecolor='#f0f0f0')
 
117
  ax = plt.gca()
118
- ax.set_facecolor('#ffffff')
119
-
120
- # Plot all input points (faint blue)
121
- plt.scatter(pts_2d[:, 0], pts_2d[:, 1], c='gray', alpha=0.3, label='Ignored Inputs')
122
-
123
- # Re-find the indices of used_vectors in the original data to plot them specifically
124
- # (Using simple distance check for visualization mapping)
125
- used_indices = []
126
- for uv in used_vectors:
127
- # Find index in original data
128
- dists = np.linalg.norm(data - uv, axis=1)
129
- used_indices.append(np.argmin(dists))
130
 
131
- used_pts_2d = pts_2d[used_indices]
132
-
133
- # Plot used points (strong blue)
134
- plt.scatter(used_pts_2d[:, 0], used_pts_2d[:, 1], c='#007acc', alpha=0.8, label='Constituent Inputs')
135
-
136
- # Draw lines from Center to Used Points (Visualizes "Gravitation")
137
- for pt in used_pts_2d:
138
- plt.plot([center_2d[0], pt[0]], [center_2d[1], pt[1]], c='#007acc', alpha=0.1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Plot Center
141
- plt.scatter(center_2d[0], center_2d[1], c='#ff4444', s=150, marker='X', edgecolors='black', label='Generated Vector')
 
 
 
142
 
143
- plt.title(f"Mode: {mode.upper()} | Score: {score:.2f}/10\nScenario: {scenario}", fontsize=10)
144
- plt.legend(loc='best', fontsize=8)
145
- plt.grid(True, linestyle='--', alpha=0.3)
146
-
147
  buf = io.BytesIO()
148
  plt.savefig(buf, format='png', bbox_inches='tight')
149
  plt.close()
@@ -151,43 +190,42 @@ def generate_plot(mode='global', scenario='split'):
151
 
152
  @app.get("/", response_class=HTMLResponse)
153
  async def root():
154
- # Scenario 1: Split Data (Two islands)
155
- img_global_split = generate_plot('global', 'split')
156
- img_cluster_split = generate_plot('cluster', 'split')
157
-
158
- # Scenario 2: Tight Data (One clump)
159
- img_global_tight = generate_plot('global', 'tight')
160
 
161
  return f"""
162
  <html>
163
- <body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:#e0e0e0; padding:20px;">
164
- <h1 style="color:#ffffff;">Vector Convergence System</h1>
165
- <p>128-Dimensional Space projected to 2D</p>
166
 
167
- <div style="background:#2a2a2a; padding:20px; border-radius:15px; margin-bottom:20px; display:inline-block;">
168
- <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario A: Scattered Data (Two Clusters)</h2>
169
- <div style="display:flex; justify-content:center; gap:20px;">
170
- <div>
171
- <h3 style="color:#66b3ff;">Global Mode</h3>
172
- <p style="font-size:0.9em; max-width:300px;">Averages everything. Result lands in the middle of nowhere (the dead zone). Low convergence score because inputs are far from result.</p>
173
- <img src="data:image/png;base64,{img_global_split}" style="border-radius:8px; border:1px solid #444;"/>
174
- </div>
175
- <div>
176
- <h3 style="color:#ff9999;">Cluster Mode</h3>
177
- <p style="font-size:0.9em; max-width:300px;">Detects the largest group. Ignores the smaller cluster and noise. High convergence score because the chosen points are tight.</p>
178
- <img src="data:image/png;base64,{img_cluster_split}" style="border-radius:8px; border:1px solid #444;"/>
 
 
 
179
  </div>
180
  </div>
181
- </div>
182
-
183
- <br/>
184
 
185
- <div style="background:#2a2a2a; padding:20px; border-radius:15px; display:inline-block;">
186
- <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario B: Converged Data</h2>
187
- <div>
188
- <h3 style="color:#99ff99;">Global Mode</h3>
189
- <p style="font-size:0.9em; max-width:300px;">Inputs are already unified. The system exerts minimal effort. Score is near max.</p>
190
- <img src="data:image/png;base64,{img_global_tight}" style="border-radius:8px; border:1px solid #444;"/>
 
 
191
  </div>
192
  </div>
193
  </body>
 
9
 
10
  app = FastAPI()
11
 
12
+ class AdaptiveVectorSystem:
13
+ def _calculate_score(self, target, constituents):
 
 
 
 
 
14
  """
15
+ Generates a score (-1.0 to 10.0) representing convergence 'effort'.
 
 
 
 
16
  """
17
+ if len(constituents) == 0:
18
  return -1.0
19
 
20
+ # Calculate distances from the calculated center to the points used
21
+ dists = np.linalg.norm(constituents - target, axis=1)
22
+
23
+ # Mean distance (how far usually)
24
+ mean_dist = np.mean(dists)
25
+ # Standard Deviation (how chaotic/scattered)
26
+ std_dev = np.std(dists)
27
+
28
+ # Heuristic: We want a high score for Low Mean and Low Std Dev.
29
+ # We normalize based on the mean_dist itself to make it scale-invariant.
30
+ # If std_dev is high relative to mean_dist, score drops.
31
 
32
+ variation_coefficient = (std_dev / (mean_dist + 1e-9))
 
33
 
34
+ # Base score starts at 10
35
+ # We penalize for high variation (chaos) and raw distance
36
+ penalty = (variation_coefficient * 5.0) + (mean_dist * 0.1)
 
 
37
 
38
+ score = 10.0 - penalty
39
+ return max(-1.0, min(10.0, score))
40
 
41
  def predict_point(self, vectors, mode='global'):
42
+ data = np.array(vectors)
43
 
44
+ # --- GLOBAL MODE ---
45
+ # "Force a fit for everyone."
46
+ # Great for converged data, terrible for split data.
47
  if mode == 'global':
48
+ center = np.mean(data, axis=0)
49
+ score = self._calculate_score(center, data)
50
+ return center, score, data
 
51
 
52
+ # --- CLUSTER MODE ---
53
+ # "Find the strongest gravity well."
54
  elif mode == 'cluster':
55
+ # 1. Compute Pairwise Distances to understand the "Scale" of the data
56
+ dist_matrix = euclidean_distances(data, data)
57
+ # Flatten matrix and remove zeros (self-distance) to get average spacing
58
+ all_dists = dist_matrix[np.triu_indices(len(data), k=1)]
59
+ avg_global_dist = np.mean(all_dists)
60
+
61
+ # 2. DYNAMIC THRESHOLDING
62
+ # We say: To belong to a group, points must be significantly closer
63
+ # than the global average. (e.g., 0.6 * average)
64
+ dynamic_thresh = avg_global_dist * 0.65
65
+
66
+ # 3. Cluster with this dynamic threshold
67
  clusterer = AgglomerativeClustering(
68
+ n_clusters=None,
69
+ metric='euclidean',
70
+ linkage='ward',
71
+ distance_threshold=dynamic_thresh
72
  )
73
+ labels = clusterer.fit_predict(data)
74
 
75
+ # 4. Find the "Best" Cluster
76
+ # We look for the Largest cluster, but we ignore "Noise" (clusters of size 1 or 2)
77
  unique_labels, counts = np.unique(labels, return_counts=True)
 
78
 
79
+ # Filter out tiny clusters (noise)
80
+ valid_clusters = [l for l, c in zip(unique_labels, counts) if c > 2]
81
 
82
+ if not valid_clusters:
83
+ # Fallback if everything is noise: treat everything as one group
84
+ return self.predict_point(data, mode='global')
85
+
86
+ # Pick largest of the valid clusters
87
+ # (You could also pick the 'densest' here, but largest is usually safest)
88
+ best_label = max(valid_clusters, key=lambda l: counts[np.where(unique_labels == l)][0])
89
 
90
+ # 5. Extract Data
91
+ cluster_vectors = data[labels == best_label]
92
+ center = np.mean(cluster_vectors, axis=0)
93
  score = self._calculate_score(center, cluster_vectors)
94
 
95
  return center, score, cluster_vectors
96
 
97
+ # --- VISUALIZATION LOGIC ---
98
  def generate_plot(mode='global', scenario='split'):
99
+ # Generate 128-dimension vectors
100
+ np.random.seed(42) # Consistent seed for demo
101
 
102
  if scenario == 'split':
103
+ # Create two dense islands far apart
104
+ # Island 1: centered at 0
105
+ c1 = np.random.normal(0, 0.5, (20, 128))
106
+ # Island 2: centered at 10 (In 128D, distance approx sqrt(128*100) = ~113 units away)
107
+ c2 = np.random.normal(8, 0.5, (20, 128))
108
+ # Noise: Random scatter
109
+ noise = np.random.uniform(-5, 15, (10, 128))
110
  data = np.vstack([c1, c2, noise])
111
  else:
112
+ # One Tight Cluster
113
+ data = np.random.normal(0, 1.0, (50, 128))
114
 
115
+ # Run System
116
+ sys = AdaptiveVectorSystem()
117
  center_vec, score, used_vectors = sys.predict_point(data, mode)
118
 
119
+ # PCA for 2D View
120
+ # Important: Fit PCA on Input + Center so they share the same coordinate space
121
  pca = PCA(n_components=2)
122
+ all_points = np.vstack([data, center_vec])
123
+ projected = pca.fit_transform(all_points)
124
 
125
+ pts_2d = projected[:-1]
126
+ center_2d = projected[-1]
 
 
 
 
 
 
 
 
 
 
127
 
128
+ # --- Plotting ---
129
+ plt.figure(figsize=(7, 5), facecolor='#202020')
130
  ax = plt.gca()
131
+ ax.set_facecolor('#303030')
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ # Logic to identify which points were used (for coloring)
134
+ # We compare the rows of 'used_vectors' to 'data' to find indices
135
+ # Note: In production, pass indices around. For demo, we do a quick check.
136
+ is_used = np.zeros(len(data), dtype=bool)
137
+
138
+ # A quick way to mask used vectors using broadcasting approximation
139
+ # (Since floats are tricky, we assume exact match from the split)
140
+ if mode == 'global':
141
+ is_used[:] = True
142
+ else:
143
+ # Brute force match for visualization accuracy
144
+ for uv in used_vectors:
145
+ for i, dv in enumerate(data):
146
+ if np.array_equal(uv, dv):
147
+ is_used[i] = True
148
+ break
149
+
150
+ # 1. Plot IGNORED points (Grey, transparent)
151
+ if not np.all(is_used):
152
+ plt.scatter(pts_2d[~is_used, 0], pts_2d[~is_used, 1],
153
+ c='#555555', alpha=0.3, s=30, label='Ignored (Noise/Other)')
154
+
155
+ # 2. Plot USED points (Bright Cyan)
156
+ plt.scatter(pts_2d[is_used, 0], pts_2d[is_used, 1],
157
+ c='#00e5ff', alpha=0.8, s=40, edgecolors='none', label='Constituent Inputs')
158
+
159
+ # 3. Draw "Gravity Lines" (faint lines from used points to center)
160
+ # Only draw lines if there aren't too many points, to keep it clean
161
+ if np.sum(is_used) < 100:
162
+ for pt in pts_2d[is_used]:
163
+ plt.plot([pt[0], center_2d[0]], [pt[1], center_2d[1]],
164
+ c='#00e5ff', alpha=0.15, linewidth=1)
165
+
166
+ # 4. Plot The PREDICTED POINT (Red X)
167
+ plt.scatter(center_2d[0], center_2d[1],
168
+ c='#ff3366', s=200, marker='X', edgecolors='white', linewidth=1.5,
169
+ label='Generated Vector', zorder=10)
170
+
171
+ # Styling
172
+ plt.title(f"Mode: {mode.upper()} | Score: {score:.2f}/10", color='white', fontsize=12, pad=10)
173
+ plt.grid(True, color='#444444', linestyle='--', alpha=0.5)
174
+
175
+ # Legend formatting
176
+ leg = plt.legend(facecolor='#303030', edgecolor='#555555', fontsize=8, loc='best')
177
+ for text in leg.get_texts():
178
+ text.set_color("white")
179
 
180
+ # Axis colors
181
+ ax.tick_params(axis='x', colors='white')
182
+ ax.tick_params(axis='y', colors='white')
183
+ for spine in ax.spines.values():
184
+ spine.set_edgecolor('#555555')
185
 
 
 
 
 
186
  buf = io.BytesIO()
187
  plt.savefig(buf, format='png', bbox_inches='tight')
188
  plt.close()
 
190
 
191
  @app.get("/", response_class=HTMLResponse)
192
  async def root():
193
+ img_global = generate_plot('global', 'split')
194
+ img_cluster = generate_plot('cluster', 'split')
195
+ img_tight = generate_plot('global', 'tight')
 
 
 
196
 
197
  return f"""
198
  <html>
199
+ <body style="font-family: 'Segoe UI', sans-serif; background:#121212; color:#e0e0e0; text-align:center; padding:20px;">
200
+ <h1 style="margin-bottom:10px;">Vector Convergence System</h1>
201
+ <p style="color:#888; margin-bottom:40px;">Dynamic Thresholding Algorithm</p>
202
 
203
+ <div style="display:flex; flex-wrap:wrap; justify-content:center; gap:20px;">
204
+ <!-- SCENARIO A -->
205
+ <div style="background:#1e1e1e; padding:20px; border-radius:12px; border:1px solid #333;">
206
+ <h2 style="color:#aaa; border-bottom:1px solid #333; padding-bottom:10px;">Scenario: Split Data</h2>
207
+ <div style="display:flex; gap:20px;">
208
+ <div>
209
+ <h3 style="color:#00e5ff;">Global Mode</h3>
210
+ <div style="font-size:0.8em; color:#888; margin-bottom:5px;">Averages everything (Score -1 to 2)</div>
211
+ <img src="data:image/png;base64,{img_global}" width="400" style="border-radius:8px;"/>
212
+ </div>
213
+ <div>
214
+ <h3 style="color:#ff3366;">Cluster Mode (Revised)</h3>
215
+ <div style="font-size:0.8em; color:#888; margin-bottom:5px;">Identifies largest mass (Score 8 to 10)</div>
216
+ <img src="data:image/png;base64,{img_cluster}" width="400" style="border-radius:8px;"/>
217
+ </div>
218
  </div>
219
  </div>
 
 
 
220
 
221
+ <!-- SCENARIO B -->
222
+ <div style="background:#1e1e1e; padding:20px; border-radius:12px; border:1px solid #333;">
223
+ <h2 style="color:#aaa; border-bottom:1px solid #333; padding-bottom:10px;">Scenario: Converged Data</h2>
224
+ <div>
225
+ <h3 style="color:#00e5ff;">Global Mode</h3>
226
+ <div style="font-size:0.8em; color:#888; margin-bottom:5px;">Efficient calculation (Score ~10)</div>
227
+ <img src="data:image/png;base64,{img_tight}" width="400" style="border-radius:8px;"/>
228
+ </div>
229
  </div>
230
  </div>
231
  </body>