Spaces:
Sleeping
Sleeping
| # file location: semantix-api/services/visualization_service.py | |
| import numpy as np | |
| import umap # pip install umap-learn | |
| from loguru import logger | |
| class VisualizationService: | |
| def __init__(self, word_service): | |
| self.word_service = word_service | |
| def _compute_color(self, similarity: float) -> str: | |
| """ | |
| Given a similarity in [0,1], return an RGB color from blue (0) to red (1). | |
| """ | |
| # Clamp similarity to [0,1] just in case | |
| sim = max(0.0, min(1.0, similarity)) | |
| # Simple gradient from blue (0,0,255) to red (255,0,0) | |
| r = int(sim * 255) | |
| g = 0 | |
| b = int((1.0 - sim) * 255) | |
| return f"rgb({r}, {g}, {b})" | |
| def prepare_3d_visualization(self, target_word: str, guessed_words: list[str]): | |
| try: | |
| embeddings = [] | |
| valid_words = [] | |
| target_embedding = self.word_service.get_vector(target_word) | |
| if target_embedding is None: | |
| return self._simple_fallback(target_word, [], []) | |
| embeddings.append(target_embedding) | |
| valid_words.append(target_word) | |
| for word in guessed_words: | |
| vec = self.word_service.get_vector(word) | |
| if vec is not None and not np.all(vec == 0): | |
| embeddings.append(vec) | |
| valid_words.append(word) | |
| # Require more points for UMAP | |
| if len(embeddings) < 5: # Increased minimum points | |
| return self._simple_fallback(target_word, valid_words, embeddings) | |
| # Otherwise, do UMAP with adjusted parameters | |
| embeddings_array = np.array(embeddings) | |
| # Adjust n_neighbors based on dataset size | |
| n_neighbors = min(3, len(embeddings) - 1) | |
| n_components = min(3, len(embeddings) - 1) | |
| reducer = umap.UMAP( | |
| n_components=n_components, | |
| n_neighbors=n_neighbors, | |
| min_dist=0.1, | |
| metric='cosine', | |
| random_state=42, | |
| # Add these parameters to handle small datasets | |
| low_memory=True, | |
| n_epochs=None, # Let UMAP decide | |
| init='random' # Use random initialization instead of spectral | |
| ) | |
| try: | |
| embedding_3d = reducer.fit_transform(embeddings_array) | |
| # If we got fewer dimensions, pad with zeros | |
| if embedding_3d.shape[1] < 3: | |
| padding = np.zeros((embedding_3d.shape[0], 3 - embedding_3d.shape[1])) | |
| embedding_3d = np.hstack([embedding_3d, padding]) | |
| except (ValueError, TypeError) as e: | |
| logger.warning(f"UMAP failed: {str(e)}, falling back to simple visualization") | |
| return self._simple_fallback(target_word, valid_words, embeddings) | |
| # Center and scale the embeddings | |
| embedding_3d -= embedding_3d[0] # Center on target | |
| max_dist = np.max(np.abs(embedding_3d)) | |
| if max_dist > 0: | |
| embedding_3d *= (1.0 / max_dist) # Scale to [-1,1] | |
| result = [] | |
| for i, word in enumerate(valid_words): | |
| if i == 0: | |
| result.append({ | |
| 'word': "???", | |
| 'coordinates': embedding_3d[i].tolist(), | |
| 'is_target': True, | |
| 'similarity': 1.0, | |
| 'color': 'rgb(255, 0, 0)' | |
| }) | |
| else: | |
| sim = self.word_service.calculate_similarity(target_word, word) | |
| color = self._compute_color(sim) | |
| result.append({ | |
| 'word': word, | |
| 'coordinates': embedding_3d[i].tolist(), | |
| 'is_target': False, | |
| 'similarity': sim, | |
| 'color': color | |
| }) | |
| return result | |
| except Exception as e: | |
| logger.exception(f"Error in visualization: {str(e)}") | |
| return self._simple_fallback(target_word, [], []) | |
| def _simple_fallback(self, target_word: str, valid_words: list[str], embeddings: list[np.ndarray]): | |
| """ | |
| Return a minimal 3D layout without UMAP | |
| when the dataset is too small to form a manifold. | |
| """ | |
| # If there's only the target, just place it at the origin. | |
| if len(embeddings) <= 1: | |
| return [{ | |
| 'word': "???", | |
| 'coordinates': [0, 0, 0], | |
| 'is_target': True, | |
| 'similarity': 1.0, | |
| 'color': 'rgb(255, 0, 0)' | |
| }] | |
| # We have at least 2 points (target + 1 guess) | |
| coords = np.random.randn(len(embeddings), 3) * 0.1 | |
| coords[0] = [0, 0, 0] # target at origin | |
| result = [] | |
| for i, word in enumerate(valid_words): | |
| if i == 0: | |
| # target | |
| result.append({ | |
| 'word': "???", | |
| 'coordinates': coords[i].tolist(), | |
| 'is_target': True, | |
| 'similarity': 1.0, | |
| 'color': 'rgb(255, 0, 0)' | |
| }) | |
| else: | |
| sim = self.word_service.calculate_similarity(target_word, word) | |
| color = self._compute_color(sim) | |
| result.append({ | |
| 'word': word, | |
| 'coordinates': coords[i].tolist(), | |
| 'is_target': False, | |
| 'similarity': sim, | |
| 'color': color | |
| }) | |
| return result | |