testing_space

Sleeping

App Files Files Community

everydaytok commited on 17 days ago

Commit

aa8d0d0

verified ·

1 Parent(s): 77dbd65

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -51

app.py CHANGED Viewed

@@ -3,85 +3,193 @@ import matplotlib.pyplot as plt
 import io, base64
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
-from pydantic import BaseModel
 from sklearn.decomposition import PCA
-from sklearn.cluster import DBSCAN
 app = FastAPI()
-class VectorSystem:
-    def get_center(self, data,
-                mode='cluster'
-                   # mode='global'
-                  ):
-        if mode == 'cluster':
-            clustering = DBSCAN(eps=0.5, min_samples=3).fit(data)
-            labels = clustering.labels_
-            if len(set(labels)) > (1 if -1 in labels else 0):
-                # Pick the largest non-noise cluster
-                largest = max(set(labels) - {-1}, key=lambda l: np.sum(labels == l))
-                selected = data[labels == largest]
-                return np.mean(selected, axis=0), "Clustered"
-        return np.mean(data, axis=0), "Global"
 # --- THE VISUALIZER ---
 def generate_plot(mode='global', scenario='split'):
     # 1. Generate High-Dim Data (128-dim)
-    np.random.seed(42)
     if scenario == 'split':
-        c1 = np.random.normal(0, 0.1, (20, 128))
-        c2 = np.random.normal(1, 0.1, (20, 128))
-        data = np.vstack([c1, c2])
     else:
-        data = np.random.normal(0.5, 0.05, (40, 128))
-    # 2. Find Center
-    sys = VectorSystem()
-    center_vec, label = sys.get_center(data, mode)
-    # 3. PCA Projection to 2D
     pca = PCA(n_components=2)
-    all_points = np.vstack([data, center_vec])
-    projected = pca.fit_transform(all_points)
-    pts_2d = projected[:-1]
-    center_2d = projected[-1]
-    # 4. Plotting
-    plt.figure(figsize=(6, 4))
-    plt.scatter(pts_2d[:, 0], pts_2d[:, 1], alpha=0.5, c='blue', label='Input Vectors')
-    plt.scatter(center_2d[0], center_2d[1], c='red', s=100, marker='X', label=f'Predicted {label} Center')
-    plt.title(f"Space Visualization ({scenario.capitalize()} Data)")
-    plt.legend()
     buf = io.BytesIO()
-    plt.savefig(buf, format='png')
     plt.close()
     return base64.b64encode(buf.getvalue()).decode('utf-8')
 @app.get("/", response_class=HTMLResponse)
 async def root():
-    # Show two scenarios side-by-side
-    img_global = generate_plot('global', 'split')
-    img_cluster = generate_plot('cluster', 'split')
     return f"""
     <html>
-        <body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:white;">
-            <h1>Vector Convergence Visualizer</h1>
-            <div style="display:flex; justify-content:center; gap:20px;">
-                <div>
-                    <h3>Global Mode</h3>
-                    <p>Lands in the "dead zone" between clusters.</p>
-                    <img src="data:image/png;base64,{img_global}" style="border-radius:10px;"/>
                 </div>
                 <div>
-                    <h3>Cluster Mode</h3>
-                    <p>Detects the density and "gravitates" to one group.</p>
-                    <img src="data:image/png;base64,{img_cluster}" style="border-radius:10px;"/>
                 </div>
             </div>
-            <p style="margin-top:30px;">This demonstrates 128-dimensional relationships projected into 2D.</p>
         </body>
     </html>
     """

 import io, base64
 from fastapi import FastAPI
 from fastapi.responses import HTMLResponse
 from sklearn.decomposition import PCA
+from sklearn.cluster import AgglomerativeClustering
+from sklearn.metrics.pairwise import euclidean_distances
 app = FastAPI()
+class VectorConvergenceSystem:
+    def __init__(self):
+        # 128-dim vectors usually have distances in range 10-20.
+        # We adjust scoring sensitivity based on typical vector behavior.
+        self.score_sensitivity = 2.0
+    def _calculate_score(self, target_vector, constituent_vectors):
+        """
+        Calculates a score between -1 and 10.
+        Logic:
+        1. Calculate distances from result vector to all inputs used.
+        2. Calculate the 'spread' (standard deviation + mean distance).
+        3. Higher spread = More effort/chaos = Lower score.
+        """
+        if len(constituent_vectors) == 0:
+            return -1.0
+        dists = euclidean_distances([target_vector], constituent_vectors)[0]
+        # 'Effort' is a mix of how far they are (mean) and how scattered they are (std)
+        effort = np.mean(dists) + np.std(dists)
+        # Map logic:
+        # If effort is 0 (identical points), score = 10.
+        # If effort is high (e.g. 10.0), score drops.
+        # Formula: 10 - (effort * sensitivity)
+        raw_score = 10.0 - (effort * 0.5)
+        # Clamp between -1 and 10
+        return max(-1.0, min(10.0, raw_score))
+    def predict_point(self, vectors, mode='global'):
+        vectors = np.array(vectors)
+        if mode == 'global':
+            # Global: The geometric center of ALL points
+            center = np.mean(vectors, axis=0)
+            score = self._calculate_score(center, vectors)
+            return center, score, vectors # Return all vectors as constituents
+        elif mode == 'cluster':
+            # Cluster: Find groups, pick the largest/densest, ignore outliers
+            # Using AgglomerativeClustering is often more stable than DBSCAN for fixed-size batches
+            # distance_threshold determines how far points can be to be same group
+            clusterer = AgglomerativeClustering(
+                n_clusters=None,
+                metric='euclidean',
+                linkage='ward',
+                distance_threshold=15.0 # Adjusted for 128-dim space
+            )
+            labels = clusterer.fit_predict(vectors)
+            # Find largest cluster
+            unique_labels, counts = np.unique(labels, return_counts=True)
+            largest_label = unique_labels[np.argmax(counts)]
+            # Filter vectors belonging to this cluster
+            cluster_vectors = vectors[labels == largest_label]
+            # Calculate center of just this cluster
+            center = np.mean(cluster_vectors, axis=0)
+            # Score is based ONLY on the vectors in the cluster (convergence of the solution)
+            score = self._calculate_score(center, cluster_vectors)
+            return center, score, cluster_vectors
 # --- THE VISUALIZER ---
 def generate_plot(mode='global', scenario='split'):
     # 1. Generate High-Dim Data (128-dim)
+    np.random.seed(99) # Fixed seed for consistency
     if scenario == 'split':
+        # Two distinct groups far apart
+        c1 = np.random.normal(0, 1.5, (15, 128))       # Cluster A
+        c2 = np.random.normal(12, 1.5, (15, 128))      # Cluster B (Far away)
+        # Add some random noise points
+        noise = np.random.uniform(-5, 15, (5, 128))
+        data = np.vstack([c1, c2, noise])
     else:
+        # One tight group
+        data = np.random.normal(0, 0.5, (40, 128))
+    # 2. Run System
+    sys = VectorConvergenceSystem()
+    center_vec, score, used_vectors = sys.predict_point(data, mode)
+    # 3. PCA Projection to 2D for visualization
+    # We must fit PCA on everything including the calculated center to align coordinates
     pca = PCA(n_components=2)
+    # Combine data for PCA fit
+    combined = np.vstack([data, center_vec])
+    projected = pca.fit_transform(combined)
+    pts_2d = projected[:-1]      # All input points
+    center_2d = projected[-1]    # The calculated center
+    # Identify which points were used (for coloring)
+    # We do a quick matching logic or simply rely on visual proximity for the demo
+    # But strictly, we want to color 'used_vectors' differently.
+    # To do this simply in 2D without complex index tracking for the demo:
+    # We'll just plot everything blue, and draw lines to the center.
+    plt.figure(figsize=(7, 5), facecolor='#f0f0f0')
+    ax = plt.gca()
+    ax.set_facecolor('#ffffff')
+    # Plot all input points (faint blue)
+    plt.scatter(pts_2d[:, 0], pts_2d[:, 1], c='gray', alpha=0.3, label='Ignored Inputs')
+    # Re-find the indices of used_vectors in the original data to plot them specifically
+    # (Using simple distance check for visualization mapping)
+    used_indices = []
+    for uv in used_vectors:
+        # Find index in original data
+        dists = np.linalg.norm(data - uv, axis=1)
+        used_indices.append(np.argmin(dists))
+    used_pts_2d = pts_2d[used_indices]
+    # Plot used points (strong blue)
+    plt.scatter(used_pts_2d[:, 0], used_pts_2d[:, 1], c='#007acc', alpha=0.8, label='Constituent Inputs')
+    # Draw lines from Center to Used Points (Visualizes "Gravitation")
+    for pt in used_pts_2d:
+        plt.plot([center_2d[0], pt[0]], [center_2d[1], pt[1]], c='#007acc', alpha=0.1)
+    # Plot Center
+    plt.scatter(center_2d[0], center_2d[1], c='#ff4444', s=150, marker='X', edgecolors='black', label='Generated Vector')
+    plt.title(f"Mode: {mode.upper()} | Score: {score:.2f}/10\nScenario: {scenario}", fontsize=10)
+    plt.legend(loc='best', fontsize=8)
+    plt.grid(True, linestyle='--', alpha=0.3)
     buf = io.BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight')
     plt.close()
     return base64.b64encode(buf.getvalue()).decode('utf-8')
 @app.get("/", response_class=HTMLResponse)
 async def root():
+    # Scenario 1: Split Data (Two islands)
+    img_global_split = generate_plot('global', 'split')
+    img_cluster_split = generate_plot('cluster', 'split')
+    # Scenario 2: Tight Data (One clump)
+    img_global_tight = generate_plot('global', 'tight')
     return f"""
     <html>
+        <body style="font-family:sans-serif; text-align:center; background:#1a1a1a; color:#e0e0e0; padding:20px;">
+            <h1 style="color:#ffffff;">Vector Convergence System</h1>
+            <p>128-Dimensional Space projected to 2D</p>
+            <div style="background:#2a2a2a; padding:20px; border-radius:15px; margin-bottom:20px; display:inline-block;">
+                <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario A: Scattered Data (Two Clusters)</h2>
+                <div style="display:flex; justify-content:center; gap:20px;">
+                    <div>
+                        <h3 style="color:#66b3ff;">Global Mode</h3>
+                        <p style="font-size:0.9em; max-width:300px;">Averages everything. Result lands in the middle of nowhere (the dead zone). Low convergence score because inputs are far from result.</p>
+                        <img src="data:image/png;base64,{img_global_split}" style="border-radius:8px; border:1px solid #444;"/>
+                    </div>
+                    <div>
+                        <h3 style="color:#ff9999;">Cluster Mode</h3>
+                        <p style="font-size:0.9em; max-width:300px;">Detects the largest group. Ignores the smaller cluster and noise. High convergence score because the chosen points are tight.</p>
+                        <img src="data:image/png;base64,{img_cluster_split}" style="border-radius:8px; border:1px solid #444;"/>
+                    </div>
                 </div>
+            </div>
+            <br/>
+            <div style="background:#2a2a2a; padding:20px; border-radius:15px; display:inline-block;">
+                <h2 style="border-bottom:1px solid #444; padding-bottom:10px;">Scenario B: Converged Data</h2>
                 <div>
+                    <h3 style="color:#99ff99;">Global Mode</h3>
+                    <p style="font-size:0.9em; max-width:300px;">Inputs are already unified. The system exerts minimal effort. Score is near max.</p>
+                    <img src="data:image/png;base64,{img_global_tight}" style="border-radius:8px; border:1px solid #444;"/>
                 </div>
             </div>
         </body>
     </html>
     """