Spaces:

Miroir
/

semantix-api

Sleeping

App Files Files Community

Miroir commited on Feb 1, 2025

Commit

cf65513

1 Parent(s): b3af544

fastapi setup for huggingface

Browse files

Files changed (14) hide show

Dockerfile +15 -0
app.py +83 -0
config/__pycache__/game_config.cpython-311.pyc +0 -0
config/game_config.py +120 -0
requirements.txt +9 -0
services/__pycache__/game_service.cpython-311.pyc +0 -0
services/__pycache__/model_downloader.cpython-311.pyc +0 -0
services/__pycache__/visualization_service.cpython-311.pyc +0 -0
services/__pycache__/word_service.cpython-311.pyc +0 -0
services/__pycache__/word_service.cpython-313.pyc +0 -0
services/game_service.py +196 -0
services/model_downloader.py +29 -0
services/visualization_service.py +143 -0
services/word_service.py +205 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,15 @@

+FROM python:3.9-slim
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+ENV MODEL_URL="https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec"
+WORKDIR /app
+COPY --chown=user requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . .
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,83 @@

+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from loguru import logger
+import sys
+import os
+from services.word_service import WordEmbeddingService
+from services.game_service import GameService
+from services.visualization_service import VisualizationService
+# Configure logger
+logger.remove()
+logger.add(sys.stdout, level="INFO")
+app = FastAPI()
+# Configure CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # Adjust this in production
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize services
+try:
+    word_service = WordEmbeddingService()
+    game_service = GameService(word_service)
+    visualization_service = VisualizationService(word_service)
+    logger.info("Services initialized successfully")
+except Exception as e:
+    logger.error(f"Failed to initialize services: {str(e)}")
+    raise e
+# Pydantic models for request validation
+class WordCheck(BaseModel):
+    word: str
+class JokerUse(BaseModel):
+    joker_type: str
+@app.get("/api/game-state")
+async def get_game_state():
+    try:
+        return game_service.get_state()
+    except Exception as e:
+        logger.error(f"Error getting game state: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+@app.post("/api/check-word")
+async def check_word(word_check: WordCheck):
+    try:
+        return game_service.check_word(word_check.word)
+    except Exception as e:
+        logger.error(f"Error checking word: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+@app.post("/api/use-joker")
+async def use_joker(joker: JokerUse):
+    try:
+        return game_service.use_joker(joker.joker_type)
+    except Exception as e:
+        logger.error(f"Error using joker: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+@app.get("/api/visualization")
+async def get_visualization():
+    try:
+        state = game_service.get_state()
+        return visualization_service.prepare_3d_visualization(
+            state["target_word"],
+            state["guessed_words"]
+        )
+    except Exception as e:
+        logger.error(f"Error getting visualization: {str(e)}")
+        raise HTTPException(status_code=500, detail="Internal server error")
+@app.get("/")
+async def root():
+    """Health check endpoint"""
+    return {"status": "ok", "message": "Semantix API is running"}

config/__pycache__/game_config.cpython-311.pyc ADDED Viewed

Binary file (2.77 kB). View file

config/game_config.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+WordVerse Game Configuration
+This file contains all configurable parameters for the WordVerse game.
+"""
+from typing import Dict, Any
+# Main configuration dictionary
+GAME_CONFIG: Dict[str, Any] = {
+    # Difficulty Settings
+    "difficulty": {
+        "easy": {
+            "jokers_high_similarity": 3,
+            "jokers_medium_similarity": 3,
+            "words_per_joker": 5,
+            "similarity_threshold": 0.99,  # Threshold to find the word
+            "time_limit": 300,  # in seconds
+        },
+        "medium": {
+            "jokers_high_similarity": 2,
+            "jokers_medium_similarity": 2,
+            "words_per_joker": 3,
+            "similarity_threshold": 0.995,
+            "time_limit": 240,
+        },
+        "hard": {
+            "jokers_high_similarity": 1,
+            "jokers_medium_similarity": 1,
+            "words_per_joker": 2,
+            "similarity_threshold": 0.998,
+            "time_limit": 180,
+        }
+    },
+    # Joker System
+    "jokers": {
+        "similarity_ranges": {
+            "high": {
+                "min": 0.7,
+                "max": 0.8
+            },
+            "medium": {
+                "min": 0.6,
+                "max": 0.7
+            }
+        },
+        "cooldown": 3,  # Number of guesses required between joker uses
+    },
+    # Scoring System
+    "scoring": {
+        "base_points": 1000,
+        "time_bonus": {
+            "enabled": True,
+            "points_per_second": 10,
+        },
+        "joker_penalty": {
+            "high_similarity": -100,
+            "medium_similarity": -50,
+        },
+        "streak_bonus": {
+            "enabled": True,
+            "threshold": 0.8,  # Similarity threshold for streak
+            "multiplier": 1.5,
+        }
+    },
+    # Game Rules
+    "rules": {
+        "max_attempts": 0,  # 0 for unlimited
+        "min_word_length": 3,
+        "show_target_word": False,  # If false, target word is hidden until found
+        "allow_partial_matches": True,
+    },
+    # UI/UX
+    "interface": {
+        "history_size": 50,  # Number of words to show in history
+        "visualization_auto_toggle": True,  # Auto show visualization on key moments
+        "visualization_moments": ["word_found", "joker_used"],
+        "feedback_levels": ["very_cold", "cold", "warm", "hot", "very_hot"],
+    },
+    # Word Selection
+    "word_selection": {
+        "categories": ["general", "science", "nature", "technology"],
+        "difficulty_weights": {
+            "easy": {"common": 0.8, "uncommon": 0.2},
+            "medium": {"common": 0.5, "uncommon": 0.5},
+            "hard": {"common": 0.2, "uncommon": 0.8}
+        },
+    },
+    # Player Progression
+    "progression": {
+        "levels_enabled": True,
+        "xp_per_game": 100,
+        "level_thresholds": [0, 1000, 2500, 5000, 10000],
+        "rewards": {
+            "level_2": {"bonus_joker": "high_similarity"},
+            "level_3": {"bonus_time": 60},
+            "level_4": {"bonus_joker": "medium_similarity"},
+            "level_5": {"unlimited_time": True}
+        }
+    }
+}
+# Current active difficulty level
+CURRENT_DIFFICULTY = "medium"
+def get_current_config() -> Dict[str, Any]:
+    """Get the current game configuration based on difficulty."""
+    base_config = GAME_CONFIG.copy()
+    difficulty_config = base_config["difficulty"][CURRENT_DIFFICULTY]
+    # Merge difficulty-specific settings into base config
+    for key, value in difficulty_config.items():
+        if key in base_config:
+            base_config[key] = value
+    return base_config

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+numpy==1.24.3
+gensim==4.3.2
+python-dotenv==1.0.0
+loguru==0.7.2
+requests==2.31.0
+scikit-learn==1.3.2
+umap-learn==0.5.5

services/__pycache__/game_service.cpython-311.pyc ADDED Viewed

Binary file (10.8 kB). View file

services/__pycache__/model_downloader.cpython-311.pyc ADDED Viewed

Binary file (2.17 kB). View file

services/__pycache__/visualization_service.cpython-311.pyc ADDED Viewed

Binary file (5.49 kB). View file

services/__pycache__/word_service.cpython-311.pyc ADDED Viewed

Binary file (12.6 kB). View file

services/__pycache__/word_service.cpython-313.pyc ADDED Viewed

Binary file (1.82 kB). View file

services/game_service.py ADDED Viewed

	@@ -0,0 +1,196 @@

+# file location: backend/services/game_service.py
+import json
+from pathlib import Path
+from loguru import logger
+import random
+from typing import Dict, List
+class GameService:
+    def __init__(self, word_service):
+        self.data_file = Path('data/game_state.json')
+        self.words_file = Path('data/word_list.json')
+        self.word_service = word_service
+        self._ensure_data_file()
+    def _ensure_data_file(self):
+        """Initialize game state file if it doesn't exist."""
+        if not self.data_file.exists():
+            self.data_file.parent.mkdir(exist_ok=True)
+            self._save_state(self._create_initial_state())
+    def _create_initial_state(self) -> Dict:
+        """Create a new game state with default values from config."""
+        from config.game_config import GAME_CONFIG, CURRENT_DIFFICULTY
+        difficulty_config = GAME_CONFIG["difficulty"][CURRENT_DIFFICULTY]
+        return {
+            'target_word': self._get_random_word(),
+            'attempts': [],
+            'word_found': False,
+            'similar_words': [],
+            'jokers': {
+                'high_similarity': {
+                    'remaining': difficulty_config['jokers_high_similarity'],
+                    'words_per_use': difficulty_config['words_per_joker']
+                },
+                'medium_similarity': {
+                    'remaining': difficulty_config['jokers_medium_similarity'],
+                    'words_per_use': difficulty_config['words_per_joker']
+                }
+            }
+        }
+    def reset_game(self) -> Dict:
+        """Reset the game with a new random word and fresh jokers."""
+        try:
+            new_state = self._create_initial_state()
+            self._save_state(new_state)
+            return new_state
+        except Exception:
+            logger.exception("Error resetting game")
+            raise
+    def use_joker(self, joker_type: str) -> Dict:
+        """Use a joker to get words within a specific similarity range."""
+        try:
+            logger.info(f"Using joker of type: {joker_type}")
+            state = self._load_state()
+            # Validate joker type and availability
+            if joker_type not in ['high_similarity', 'medium_similarity']:
+                logger.error(f"Invalid joker type: {joker_type}")
+                raise ValueError("Invalid joker type")
+            joker = state['jokers'][joker_type]
+            if joker['remaining'] <= 0:
+                logger.warning(f"No {joker_type} jokers remaining")
+                raise ValueError("No jokers remaining of this type")
+            # Similarity range
+            sim_range = {
+                'high_similarity': (0.7, 0.8),
+                'medium_similarity': (0.6, 0.7)
+            }[joker_type]
+            target = state['target_word']
+            logger.info(f"Target word: {target}, range: {sim_range}")
+            # Get words in range
+            similar_words = self.word_service.get_words_in_range(
+                target,
+                sim_range[0],
+                sim_range[1],
+                n=joker['words_per_use']
+            )
+            # Log the results
+            logger.info(f"Found {len(similar_words)} words using joker:")
+            for w in similar_words:
+                logger.info(f"- {w['word']} (similarity: {w['similarity']:.3f})")
+            # Update joker count
+            joker['remaining'] -= 1
+            self._save_state(state)
+            logger.info(f"Remaining {joker_type} jokers: {joker['remaining']}")
+            return {'joker_words': similar_words, 'jokers': state['jokers']}
+        except Exception:
+            logger.exception("Error using joker")
+            raise
+    def get_center_word_power(self, chosen_words: List[str]) -> Dict[str, float]:
+        """
+        Compute and return the “center word” based on the user’s chosen words
+        and the current target word.
+        """
+        try:
+            # Load current state to get the target word
+            state = self._load_state()
+            target_word = state['target_word']
+            result = self.word_service.get_center_word(chosen_words, target_word)
+            if not result:
+                logger.warning("Center word power returned no result.")
+                return {}
+            logger.info(f"Center word found: {result['word']} (sim={result['similarity']:.3f})")
+            return result
+        except Exception:
+            logger.exception("Error computing center word power")
+            return {}
+    def _get_random_word(self) -> str:
+        """Get a random word from the game's word list."""
+        try:
+            with open(self.words_file, 'r', encoding='utf-8') as f:
+                words_data = json.load(f)
+                return random.choice(words_data['words'])
+        except Exception:
+            logger.exception("Error loading word list")
+            return "mathématiques"  # fallback word
+    def save_attempt(self, word: str, similarity: float) -> Dict:
+        """Save a word attempt and update game state."""
+        try:
+            if not word or similarity <= 0:
+                return self._load_state()
+            state = self._load_state()
+            state['attempts'].append({'word': word, 'similarity': similarity})
+            # Check if word is found (similarity > 0.99)
+            if similarity > 0.99:
+                state['word_found'] = True
+                # Get similar words when the target is found
+                state['similar_words'] = self.word_service.get_most_similar_words(
+                    state['target_word'], n=100
+                )
+            self._save_state(state)
+            return state
+        except Exception:
+            logger.exception("Error saving attempt")
+            raise
+    def _save_state(self, state: Dict) -> None:
+        """Save game state to file."""
+        try:
+            self.data_file.parent.mkdir(exist_ok=True)
+            with open(self.data_file, 'w', encoding='utf-8') as f:
+                json.dump(state, f, ensure_ascii=False, indent=2)
+        except Exception:
+            logger.exception("Error saving game state")
+            raise
+    def _load_state(self) -> Dict:
+        """Load game state from file."""
+        try:
+            if not self.data_file.exists():
+                self._ensure_data_file()
+            with open(self.data_file, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        except Exception:
+            logger.exception("Error loading game state")
+            raise
+    def get_state(self) -> Dict:
+        """Get current game state."""
+        try:
+            return self._load_state()
+        except Exception:
+            logger.exception("Error getting game state")
+            raise
+    def get_history(self) -> List[Dict]:
+        """Get history of attempts."""
+        try:
+            state = self._load_state()
+            return state['attempts']
+        except Exception:
+            logger.exception("Error getting history")
+            return []

services/model_downloader.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import os
+import requests
+from loguru import logger
+from pathlib import Path
+def download_model(url: str, model_path: str):
+    """Download the model file if it doesn't exist."""
+    if os.path.exists(model_path):
+        logger.info(f"Model file already exists at {model_path}")
+        return
+    logger.info(f"Downloading model from {url}")
+    os.makedirs(os.path.dirname(model_path), exist_ok=True)
+    try:
+        response = requests.get(url, stream=True)
+        response.raise_for_status()
+        total_size = int(response.headers.get('content-length', 0))
+        block_size = 1024  # 1 KB
+        with open(model_path, 'wb') as f:
+            for data in response.iter_content(block_size):
+                f.write(data)
+        logger.info(f"Model downloaded successfully to {model_path}")
+    except Exception as e:
+        logger.error(f"Error downloading model: {str(e)}")
+        raise

services/visualization_service.py ADDED Viewed

	@@ -0,0 +1,143 @@

+# file location: backend/services/visualization_service.py
+import numpy as np
+import umap  # pip install umap-learn
+from loguru import logger
+class VisualizationService:
+    def __init__(self, word_service):
+        self.word_service = word_service
+    def _compute_color(self, similarity: float) -> str:
+        """
+        Given a similarity in [0,1], return an RGB color from blue (0) to red (1).
+        """
+        # Clamp similarity to [0,1] just in case
+        sim = max(0.0, min(1.0, similarity))
+        # Simple gradient from blue (0,0,255) to red (255,0,0)
+        r = int(sim * 255)
+        g = 0
+        b = int((1.0 - sim) * 255)
+        return f"rgb({r}, {g}, {b})"
+    def prepare_3d_visualization(self, target_word: str, guessed_words: list[str]):
+        try:
+            embeddings = []
+            valid_words = []
+            target_embedding = self.word_service.get_vector(target_word)
+            if target_embedding is None:
+                return [{
+                    'word': "???",
+                    'coordinates': [0, 0, 0],
+                    'is_target': True,
+                    'similarity': 1.0,
+                    'color': 'rgb(255, 0, 0)'
+                }]
+            embeddings.append(target_embedding)
+            valid_words.append(target_word)
+            for word in guessed_words:
+                vec = self.word_service.get_vector(word)
+                if vec is not None and not np.all(vec == 0):
+                    embeddings.append(vec)
+                    valid_words.append(word)
+            # if there's only 1 or 2 embeddings total, no manifold can form
+            if len(embeddings) < 3:
+                return self._simple_fallback(target_word, valid_words, embeddings)
+            # Otherwise, do UMAP
+            embeddings_array = np.array(embeddings)
+            neighbors = min(5, len(embeddings) - 1)
+            import umap
+            reducer = umap.UMAP(
+                n_components=3,
+                n_neighbors=neighbors,
+                min_dist=0.1,
+                metric='cosine',
+                random_state=42
+            )
+            embedding_3d = reducer.fit_transform(embeddings_array)
+            # Re-center target at (0,0,0)
+            target_coords = embedding_3d[0]
+            embedding_3d -= target_coords
+            result = []
+            for i, word in enumerate(valid_words):
+                if i == 0:
+                    # target
+                    result.append({
+                        'word': "???",
+                        'coordinates': embedding_3d[i].tolist(),
+                        'is_target': True,
+                        'similarity': 1.0,
+                        'color': 'rgb(255, 0, 0)'
+                    })
+                else:
+                    sim = self.word_service.calculate_similarity(target_word, word)
+                    color = self._compute_color(sim)
+                    result.append({
+                        'word': word,
+                        'coordinates': embedding_3d[i].tolist(),
+                        'is_target': False,
+                        'similarity': sim,
+                        'color': color
+                    })
+            return result
+        except Exception:
+            logger.exception("Error preparing 3D visualization with UMAP")
+            return [{
+                'word': "???",
+                'coordinates': [0, 0, 0],
+                'is_target': True,
+                'similarity': 1.0,
+                'color': 'rgb(255, 0, 0)'
+            }]
+    def _simple_fallback(self, target_word: str, valid_words: list[str], embeddings: list[np.ndarray]):
+        """
+        Return a minimal 3D layout without UMAP
+        when the dataset is too small to form a manifold.
+        """
+        # If there's only the target, just place it at the origin.
+        if len(embeddings) <= 1:
+            return [{
+                'word': "???",
+                'coordinates': [0, 0, 0],
+                'is_target': True,
+                'similarity': 1.0,
+                'color': 'rgb(255, 0, 0)'
+            }]
+        # We have at least 2 points (target + 1 guess)
+        coords = np.random.randn(len(embeddings), 3) * 0.1
+        coords[0] = [0, 0, 0]  # target at origin
+        result = []
+        for i, word in enumerate(valid_words):
+            if i == 0:
+                # target
+                result.append({
+                    'word': "???",
+                    'coordinates': coords[i].tolist(),
+                    'is_target': True,
+                    'similarity': 1.0,
+                    'color': 'rgb(255, 0, 0)'
+                })
+            else:
+                sim = self.word_service.calculate_similarity(target_word, word)
+                color = self._compute_color(sim)
+                result.append({
+                    'word': word,
+                    'coordinates': coords[i].tolist(),
+                    'is_target': False,
+                    'similarity': sim,
+                    'color': color
+                })
+        return result

services/word_service.py ADDED Viewed

	@@ -0,0 +1,205 @@

+from loguru import logger
+import numpy as np
+from typing import List, Dict
+import random
+from gensim.models import KeyedVectors
+import os
+import tempfile
+import requests
+class WordEmbeddingService:
+    _instance = None
+    _model = None
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super(WordEmbeddingService, cls).__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if not WordEmbeddingService._model:
+            self._initialize_model()
+    def _initialize_model(self):
+        """Initialize the model only when needed"""
+        try:
+            # Get model URL from environment variable
+            model_url = os.getenv('MODEL_URL', 'https://huggingface.co/Miroir/cc.fr.300.reduced/resolve/main/cc.fr.300.reduced.vec')
+            logger.info("Loading FastText embeddings from URL...")
+            # Create a temporary file to store the model
+            with tempfile.NamedTemporaryFile(delete=False) as temp_file:
+                # Download the file
+                response = requests.get(model_url, stream=True)
+                response.raise_for_status()
+                # Write the content to the temporary file
+                for chunk in response.iter_content(chunk_size=8192):
+                    if chunk:
+                        temp_file.write(chunk)
+                temp_file.flush()
+                # Load the model from the temporary file
+                WordEmbeddingService._model = KeyedVectors.load_word2vec_format(temp_file.name)
+            # Build vocabulary vectors
+            self.vocab_vectors = {
+                word: WordEmbeddingService._model[word]
+                for word in WordEmbeddingService._model.index_to_key
+            }
+            logger.info(f"FastText model loaded successfully with "
+                       f"{len(self.vocab_vectors)} words in the vocabulary.")
+        except Exception as e:
+            logger.exception(f"Failed to load FastText model: {str(e)}")
+            raise
+    def _ensure_model_loaded(self):
+        """Ensure the model is loaded before any operation"""
+        if not WordEmbeddingService._model:
+            self._initialize_model()
+    def calculate_similarity(self, word1: str, word2: str) -> float:
+        self._ensure_model_loaded()
+        try:
+            w1, w2 = word1.lower(), word2.lower()
+            if w1 not in WordEmbeddingService._model or w2 not in WordEmbeddingService._model:
+                logger.warning(f"One or both words not in FastText vocab: '{word1}', '{word2}'")
+                return 0.0
+            return float(WordEmbeddingService._model.similarity(w1, w2))
+        except Exception:
+            logger.exception(f"Error calculating similarity between '{word1}' and '{word2}'")
+            return 0.0
+    def get_vector(self, word: str) -> np.ndarray:
+        """
+        Retrieve the vector representation of a word.
+        Returns None if the word is not found in the FastText vocabulary.
+        """
+        try:
+            w = word.lower()
+            if w not in self.model:
+                logger.warning(f"No vector found for word: {word}")
+                return None
+            return self.model[w]
+        except Exception:
+            logger.exception(f"Error getting vector for word: {word}")
+            return None
+    def get_most_similar_words(self, target_word: str, n: int = 100) -> List[Dict[str, float]]:
+        """
+        Return the `n` most similar words to `target_word`.
+        An empty list is returned if `target_word` is out of vocabulary.
+        """
+        try:
+            w = target_word.lower()
+            if w not in self.model:
+                logger.warning(f"Target word not found in vocab: {target_word}")
+                return []
+            similar = self.model.most_similar(w, topn=n)
+            return [{'word': word, 'similarity': float(sim)} for word, sim in similar]
+        except Exception:
+            logger.exception(f"Error finding similar words for: {target_word}")
+            return []
+    def get_words_in_range(self, target_word: str, min_similarity: float,
+                          max_similarity: float, n: int = 5) -> List[Dict[str, float]]:
+        """
+        Retrieve up to `n` words whose similarity to `target_word`
+        lies within [min_similarity, max_similarity].
+        The results are randomly sampled from all words meeting the criterion.
+        """
+        try:
+            logger.info(f"Finding words for '{target_word}' in range "
+                       f"[{min_similarity}, {max_similarity}]")
+            target_vec = self.get_vector(target_word)
+            if target_vec is None:
+                logger.warning(f"No vector for target word: {target_word}")
+                return []
+            similarities = []
+            norm_target = np.linalg.norm(target_vec)
+            # Sample from vocabulary to improve performance
+            sample_size = min(100000, len(self.vocab_vectors))
+            sampled_words = random.sample(list(self.vocab_vectors.keys()), sample_size)
+            for vocab_word in sampled_words:
+                if vocab_word == target_word.lower():
+                    continue
+                vector = self.vocab_vectors[vocab_word]
+                sim = float(np.dot(vector, target_vec) /
+                          (np.linalg.norm(vector) * norm_target))
+                if min_similarity <= sim <= max_similarity:
+                    similarities.append({'word': vocab_word, 'similarity': sim})
+            logger.info(f"Found {len(similarities)} words in the range.")
+            if not similarities:
+                return []
+            similarities.sort(key=lambda x: x['similarity'], reverse=True)
+            selected_words = random.sample(similarities, min(n, len(similarities)))
+            for w in selected_words:
+                logger.debug(f"Selected: {w['word']} (sim={w['similarity']:.3f})")
+            return selected_words
+        except Exception:
+            logger.exception(f"Error finding words in range for: {target_word}")
+            return []
+    def get_center_word(self, chosen_words: List[str], target_word: str) -> Dict[str, float]:
+        """
+        Compute the centroid of (chosen_words + target_word) vectors,
+        then find the single word in the vocabulary whose vector is closest
+        to that centroid (in cosine similarity).
+        """
+        if not chosen_words:
+            logger.warning("No chosen words provided.")
+            return {}
+        vectors = []
+        for w in chosen_words:
+            vec = self.get_vector(w)
+            if vec is not None:
+                vectors.append(vec)
+        target_vec = self.get_vector(target_word)
+        if target_vec is not None:
+            vectors.append(target_vec)
+        if not vectors:
+            logger.warning("No valid vectors found among chosen or target words.")
+            return {}
+        centroid = np.mean(vectors, axis=0)
+        best_word = None
+        best_similarity = -1.0
+        # Sample from vocabulary to improve performance
+        sample_size = min(100000, len(self.vocab_vectors))
+        sampled_words = random.sample(list(self.vocab_vectors.keys()), sample_size)
+        for vocab_word in sampled_words:
+            if vocab_word == target_word.lower() or vocab_word in [cw.lower() for cw in chosen_words]:
+                continue
+            vector = self.vocab_vectors[vocab_word]
+            sim = float(np.dot(vector, centroid) /
+                       (np.linalg.norm(vector) * np.linalg.norm(centroid)))
+            if sim > best_similarity:
+                best_similarity = sim
+                best_word = vocab_word
+        if best_word is None:
+            logger.warning("Could not find a center word.")
+            return {}
+        return {"word": best_word, "similarity": best_similarity}