everydaytok commited on
Commit
aa8d0d0
·
verified ·
1 Parent(s): 77dbd65

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -51
app.py CHANGED
@@ -3,85 +3,193 @@ import matplotlib.pyplot as plt
3
  import io, base64
4
  from fastapi import FastAPI
5
  from fastapi.responses import HTMLResponse
6
- from pydantic import BaseModel
7
  from sklearn.decomposition import PCA
8
- from sklearn.cluster import DBSCAN
 
9
 
10
  app = FastAPI()
11
 
12
- class VectorSystem:
13
- def get_center(self, data,
14
- mode='cluster'
15
- # mode='global'
16
- ):
17
- if mode == 'cluster':
18
- clustering = DBSCAN(eps=0.5, min_samples=3).fit(data)
19
- labels = clustering.labels_
20
- if len(set(labels)) > (1 if -1 in labels else 0):
21
- # Pick the largest non-noise cluster
22
- largest = max(set(labels) - {-1}, key=lambda l: np.sum(labels == l))
23
- selected = data[labels == largest]
24
- return np.mean(selected, axis=0), "Clustered"
25
- return np.mean(data, axis=0), "Global"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
  # --- THE VISUALIZER ---
28
  def generate_plot(mode='global', scenario='split'):
29
  # 1. Generate High-Dim Data (128-dim)
30
- np.random.seed(42)
 
31
  if scenario == 'split':
32
- c1 = np.random.normal(0, 0.1, (20, 128))
33
- c2 = np.random.normal(1, 0.1, (20, 128))
34
- data = np.vstack([c1, c2])
 
 
 
35
  else:
36
- data = np.random.normal(0.5, 0.05, (40, 128))
 
37
 
38
- # 2. Find Center
39
- sys = VectorSystem()
40
- center_vec, label = sys.get_center(data, mode)
41
 
42
- # 3. PCA Projection to 2D
 
43
  pca = PCA(n_components=2)
44
- all_points = np.vstack([data, center_vec])
45
- projected = pca.fit_transform(all_points)
46
 
47
- pts_2d = projected[:-1]
48
- center_2d = projected[-1]
49
-
50
- # 4. Plotting
51
- plt.figure(figsize=(6, 4))
52
- plt.scatter(pts_2d[:, 0], pts_2d[:, 1], alpha=0.5, c='blue', label='Input Vectors')
53
- plt.scatter(center_2d[0], center_2d[1], c='red', s=100, marker='X', label=f'Predicted {label} Center')
54
- plt.title(f"Space Visualization ({scenario.capitalize()} Data)")
55
- plt.legend()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  buf = io.BytesIO()
58
- plt.savefig(buf, format='png')
59
  plt.close()
60
  return base64.b64encode(buf.getvalue()).decode('utf-8')
61
 
62
  @app.get("/", response_class=HTMLResponse)
63
  async def root():
64
- # Show two scenarios side-by-side
65
- img_global = generate_plot('global', 'split')
66
- img_cluster = generate_plot('cluster', 'split')
 
 
 
67
 
68
  return f"""
69
  <html>
70
- <body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:white;">
71
- <h1>Vector Convergence Visualizer</h1>
72
- <div style="display:flex; justify-content:center; gap:20px;">
73
- <div>
74
- <h3>Global Mode</h3>
75
- <p>Lands in the "dead zone" between clusters.</p>
76
- <img src="data:image/png;base64,{img_global}" style="border-radius:10px;"/>
 
 
 
 
 
 
 
 
 
 
77
  </div>
 
 
 
 
 
 
78
  <div>
79
- <h3>Cluster Mode</h3>
80
- <p>Detects the density and "gravitates" to one group.</p>
81
- <img src="data:image/png;base64,{img_cluster}" style="border-radius:10px;"/>
82
  </div>
83
  </div>
84
- <p style="margin-top:30px;">This demonstrates 128-dimensional relationships projected into 2D.</p>
85
  </body>
86
  </html>
87
  """
 
3
  import io, base64
4
  from fastapi import FastAPI
5
  from fastapi.responses import HTMLResponse
 
6
  from sklearn.decomposition import PCA
7
+ from sklearn.cluster import AgglomerativeClustering
8
+ from sklearn.metrics.pairwise import euclidean_distances
9
 
10
  app = FastAPI()
11
 
12
+ class VectorConvergenceSystem:
13
+ def __init__(self):
14
+ # 128-dim vectors usually have distances in range 10-20.
15
+ # We adjust scoring sensitivity based on typical vector behavior.
16
+ self.score_sensitivity = 2.0
17
+
18
+ def _calculate_score(self, target_vector, constituent_vectors):
19
+ """
20
+ Calculates a score between -1 and 10.
21
+ Logic:
22
+ 1. Calculate distances from result vector to all inputs used.
23
+ 2. Calculate the 'spread' (standard deviation + mean distance).
24
+ 3. Higher spread = More effort/chaos = Lower score.
25
+ """
26
+ if len(constituent_vectors) == 0:
27
+ return -1.0
28
+
29
+ dists = euclidean_distances([target_vector], constituent_vectors)[0]
30
+
31
+ # 'Effort' is a mix of how far they are (mean) and how scattered they are (std)
32
+ effort = np.mean(dists) + np.std(dists)
33
+
34
+ # Map logic:
35
+ # If effort is 0 (identical points), score = 10.
36
+ # If effort is high (e.g. 10.0), score drops.
37
+ # Formula: 10 - (effort * sensitivity)
38
+ raw_score = 10.0 - (effort * 0.5)
39
+
40
+ # Clamp between -1 and 10
41
+ return max(-1.0, min(10.0, raw_score))
42
+
43
+ def predict_point(self, vectors, mode='global'):
44
+ vectors = np.array(vectors)
45
+
46
+ if mode == 'global':
47
+ # Global: The geometric center of ALL points
48
+ center = np.mean(vectors, axis=0)
49
+ score = self._calculate_score(center, vectors)
50
+ return center, score, vectors # Return all vectors as constituents
51
+
52
+ elif mode == 'cluster':
53
+ # Cluster: Find groups, pick the largest/densest, ignore outliers
54
+ # Using AgglomerativeClustering is often more stable than DBSCAN for fixed-size batches
55
+ # distance_threshold determines how far points can be to be same group
56
+ clusterer = AgglomerativeClustering(
57
+ n_clusters=None,
58
+ metric='euclidean',
59
+ linkage='ward',
60
+ distance_threshold=15.0 # Adjusted for 128-dim space
61
+ )
62
+ labels = clusterer.fit_predict(vectors)
63
+
64
+ # Find largest cluster
65
+ unique_labels, counts = np.unique(labels, return_counts=True)
66
+ largest_label = unique_labels[np.argmax(counts)]
67
+
68
+ # Filter vectors belonging to this cluster
69
+ cluster_vectors = vectors[labels == largest_label]
70
+
71
+ # Calculate center of just this cluster
72
+ center = np.mean(cluster_vectors, axis=0)
73
+
74
+ # Score is based ONLY on the vectors in the cluster (convergence of the solution)
75
+ score = self._calculate_score(center, cluster_vectors)
76
+
77
+ return center, score, cluster_vectors
78
 
79
  # --- THE VISUALIZER ---
80
  def generate_plot(mode='global', scenario='split'):
81
  # 1. Generate High-Dim Data (128-dim)
82
+ np.random.seed(99) # Fixed seed for consistency
83
+
84
  if scenario == 'split':
85
+ # Two distinct groups far apart
86
+ c1 = np.random.normal(0, 1.5, (15, 128)) # Cluster A
87
+ c2 = np.random.normal(12, 1.5, (15, 128)) # Cluster B (Far away)
88
+ # Add some random noise points
89
+ noise = np.random.uniform(-5, 15, (5, 128))
90
+ data = np.vstack([c1, c2, noise])
91
  else:
92
+ # One tight group
93
+ data = np.random.normal(0, 0.5, (40, 128))
94
 
95
+ # 2. Run System
96
+ sys = VectorConvergenceSystem()
97
+ center_vec, score, used_vectors = sys.predict_point(data, mode)
98
 
99
+ # 3. PCA Projection to 2D for visualization
100
+ # We must fit PCA on everything including the calculated center to align coordinates
101
  pca = PCA(n_components=2)
 
 
102
 
103
+ # Combine data for PCA fit
104
+ combined = np.vstack([data, center_vec])
105
+ projected = pca.fit_transform(combined)
106
+
107
+ pts_2d = projected[:-1] # All input points
108
+ center_2d = projected[-1] # The calculated center
109
+
110
+ # Identify which points were used (for coloring)
111
+ # We do a quick matching logic or simply rely on visual proximity for the demo
112
+ # But strictly, we want to color 'used_vectors' differently.
113
+ # To do this simply in 2D without complex index tracking for the demo:
114
+ # We'll just plot everything blue, and draw lines to the center.
115
+
116
+ plt.figure(figsize=(7, 5), facecolor='#f0f0f0')
117
+ ax = plt.gca()
118
+ ax.set_facecolor('#ffffff')
119
+
120
+ # Plot all input points (faint blue)
121
+ plt.scatter(pts_2d[:, 0], pts_2d[:, 1], c='gray', alpha=0.3, label='Ignored Inputs')
122
+
123
+ # Re-find the indices of used_vectors in the original data to plot them specifically
124
+ # (Using simple distance check for visualization mapping)
125
+ used_indices = []
126
+ for uv in used_vectors:
127
+ # Find index in original data
128
+ dists = np.linalg.norm(data - uv, axis=1)
129
+ used_indices.append(np.argmin(dists))
130
+
131
+ used_pts_2d = pts_2d[used_indices]
132
+
133
+ # Plot used points (strong blue)
134
+ plt.scatter(used_pts_2d[:, 0], used_pts_2d[:, 1], c='#007acc', alpha=0.8, label='Constituent Inputs')
135
+
136
+ # Draw lines from Center to Used Points (Visualizes "Gravitation")
137
+ for pt in used_pts_2d:
138
+ plt.plot([center_2d[0], pt[0]], [center_2d[1], pt[1]], c='#007acc', alpha=0.1)
139
+
140
+ # Plot Center
141
+ plt.scatter(center_2d[0], center_2d[1], c='#ff4444', s=150, marker='X', edgecolors='black', label='Generated Vector')
142
+
143
+ plt.title(f"Mode: {mode.upper()} | Score: {score:.2f}/10\nScenario: {scenario}", fontsize=10)
144
+ plt.legend(loc='best', fontsize=8)
145
+ plt.grid(True, linestyle='--', alpha=0.3)
146
 
147
  buf = io.BytesIO()
148
+ plt.savefig(buf, format='png', bbox_inches='tight')
149
  plt.close()
150
  return base64.b64encode(buf.getvalue()).decode('utf-8')
151
 
152
  @app.get("/", response_class=HTMLResponse)
153
  async def root():
154
+ # Scenario 1: Split Data (Two islands)
155
+ img_global_split = generate_plot('global', 'split')
156
+ img_cluster_split = generate_plot('cluster', 'split')
157
+
158
+ # Scenario 2: Tight Data (One clump)
159
+ img_global_tight = generate_plot('global', 'tight')
160
 
161
  return f"""
162
  <html>
163
+ <body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:#e0e0e0; padding:20px;">
164
+ <h1 style="color:#ffffff;">Vector Convergence System</h1>
165
+ <p>128-Dimensional Space projected to 2D</p>
166
+
167
+ <div style="background:#2a2a2a; padding:20px; border-radius:15px; margin-bottom:20px; display:inline-block;">
168
+ <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario A: Scattered Data (Two Clusters)</h2>
169
+ <div style="display:flex; justify-content:center; gap:20px;">
170
+ <div>
171
+ <h3 style="color:#66b3ff;">Global Mode</h3>
172
+ <p style="font-size:0.9em; max-width:300px;">Averages everything. Result lands in the middle of nowhere (the dead zone). Low convergence score because inputs are far from result.</p>
173
+ <img src="data:image/png;base64,{img_global_split}" style="border-radius:8px; border:1px solid #444;"/>
174
+ </div>
175
+ <div>
176
+ <h3 style="color:#ff9999;">Cluster Mode</h3>
177
+ <p style="font-size:0.9em; max-width:300px;">Detects the largest group. Ignores the smaller cluster and noise. High convergence score because the chosen points are tight.</p>
178
+ <img src="data:image/png;base64,{img_cluster_split}" style="border-radius:8px; border:1px solid #444;"/>
179
+ </div>
180
  </div>
181
+ </div>
182
+
183
+ <br/>
184
+
185
+ <div style="background:#2a2a2a; padding:20px; border-radius:15px; display:inline-block;">
186
+ <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario B: Converged Data</h2>
187
  <div>
188
+ <h3 style="color:#99ff99;">Global Mode</h3>
189
+ <p style="font-size:0.9em; max-width:300px;">Inputs are already unified. The system exerts minimal effort. Score is near max.</p>
190
+ <img src="data:image/png;base64,{img_global_tight}" style="border-radius:8px; border:1px solid #444;"/>
191
  </div>
192
  </div>
 
193
  </body>
194
  </html>
195
  """