File size: 1,442 Bytes
cdb73a8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""Centralized configuration for the book recommender application."""

import os
from pathlib import Path

from dotenv import load_dotenv

load_dotenv()

BASE_DIR = Path(__file__).resolve().parent.parent.parent.parent

DATA_DIR = BASE_DIR / "data"
# Ensure data directory exists
DATA_DIR.mkdir(parents=True, exist_ok=True)

RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"

RAW_DATA_PATH = RAW_DATA_DIR / "books_prepared.csv"
PROCESSED_DATA_PATH = PROCESSED_DATA_DIR / "books_cleaned.parquet"
EMBEDDINGS_PATH = PROCESSED_DATA_DIR / "book_embeddings.npy"
EMBEDDING_METADATA_PATH = PROCESSED_DATA_DIR / "embedding_metadata.json"
CLUSTERS_CACHE_PATH = PROCESSED_DATA_DIR / "cluster_cache.pkl"

EMBEDDING_MODEL = os.getenv("EMBEDDING_MODEL", "all-MiniLM-L6-v2")

EMBEDDING_DIMENSION = 384

EMBEDDING_DEVICE = os.getenv("EMBEDDING_DEVICE", "cpu")

DEFAULT_BATCH_SIZE = 64

DEFAULT_TOP_K = 10
MIN_SIMILARITY_THRESHOLD = 0.3
NUM_CLUSTERS = int(os.getenv("NUM_CLUSTERS", "50"))


APP_VERSION = "0.1.0"
FALLBACK_COVER_URL = "https://placehold.co/200x300/667eea/white?text=No+Cover"


# --- Data/Model Versioning (Future Consideration)
# For production systems, consider implementing a robust data and model
# versioning system (e.g., DVC - Data Version Control) to track changes
# to processed data and generated embeddings. For this MVP, manual
# management or timestamping of files is suggested if versioning is critical.