| import os | |
| # Paths | |
| PROJECT_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| LANCEDB_DIR = os.path.join(PROJECT_DIR, "data", "lancedb") | |
| TABLE_NAME = "comparia_conversations" | |
| # Datasets | |
| HF_DATASET_NAME = "ministere-culture/comparia-conversations" | |
| HF_VOTES_DATASET = "ministere-culture/comparia-votes" | |
| HF_REACTIONS_DATASET = "ministere-culture/comparia-reactions" | |
| SAMPLE_SIZE = int(os.environ.get("SAMPLE_SIZE", "1000")) | |
| TABLE_NAME_VOTES = "comparia_votes" | |
| TABLE_NAME_REACTIONS = "comparia_reactions" | |
| # Embedding via OpenRouter API | |
| OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "") | |
| EMBEDDING_MODEL = "openai/text-embedding-3-small" | |
| EMBEDDING_DIMENSIONS = 384 # multilingual-e5-small output dims (local model) | |
| # Search defaults | |
| DEFAULT_SEARCH_LIMIT = 20 | |
| # Topic map | |
| TOPIC_NUM_CLUSTERS = 15 | |
| TOPIC_MAP_MAX_DISPLAY = int(os.environ.get("TOPIC_MAP_MAX_DISPLAY", "2000")) | |
| SEARCH_VECTOR_DIMS = 384 # Truncation for search/UMAP (multilingual-e5-small outputs 384D) | |
| EMBED_CACHE_SIZE = 256 # LRU cache slots for query embeddings | |
| TOPIC_LABEL_MODEL = "mistralai/mistral-small-3.1-24b-instruct" | |
| TOPIC_MAP_CACHE = os.path.join(PROJECT_DIR, "data", "topic_map_cache.pkl") | |
| # Privacy: set to "false" to hide individual conversation messages in the UI | |
| SHOW_CONVERSATIONS = os.environ.get("SHOW_CONVERSATIONS", "true").lower() == "true" | |