File size: 1,273 Bytes
102dac3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

# Base Paths
BACKEND_DIR = Path(__file__).resolve().parent
PROJECT_ROOT = BACKEND_DIR.parent
PERSONAL_DATA_DIR = PROJECT_ROOT / "personal_data"
ASSETS_DIR = BACKEND_DIR / "assests" # Copied spelling from existing folder structure
DATA_DIR = BACKEND_DIR / "data"

# Data Paths
FAISS_PATH = DATA_DIR / "faiss_store" / "v30_1000-250" # Make this dynamic if needed?
CHUNKS_PATH = DATA_DIR / "all_chunks.json"
FAILED_CHUNKS_PATH = PROJECT_ROOT / "failed_chunks.txt"
BIO_PATH = PERSONAL_DATA_DIR / "bio.md"
RESUME_PATH = ASSETS_DIR / "KrishnaVamsiDhulipalla.pdf"
UI_DIST = PROJECT_ROOT / "ui" / "dist"

# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID")
GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET")
PUBLIC_BASE_URL = os.getenv("PUBLIC_BASE_URL", "http://localhost:8000")

# Embedding Config
EMBEDDING_MODEL_NAME = "text-embedding-3-small"
USE_OPENAI_EMBEDDING = True
CROSS_ENCODER_MODEL = "cross-encoder/ms-marco-MiniLM-L-6-v2"

# Retriever Config
K_PER_QUERY = 6
TOP_K = 8
RRF_K = 60
RERANK_TOP_N = 20
MMR_LAMBDA = 0.7

# Memory
MEM_FAISS_PATH = os.getenv("MEM_FAISS_PATH", str(DATA_DIR / "memory_faiss"))
MEM_AUTOSAVE_EVERY = 20