| import os | |
| from pathlib import Path | |
| from dotenv import load_dotenv | |
| from qdrant_client import QdrantClient | |
| load_dotenv() | |
| # Paths | |
| BASE_DIR = Path(__file__).parent.parent | |
| DATA_DIR = BASE_DIR / "data" | |
| RAW_DATA_DIR = DATA_DIR / "raw" | |
| VECTOR_DB_DIR = DATA_DIR / "qdrant_db" | |
| # Ensure directories exist | |
| RAW_DATA_DIR.mkdir(parents=True, exist_ok=True) | |
| VECTOR_DB_DIR.mkdir(parents=True, exist_ok=True) | |
| # Secrets | |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
| if not OPENAI_API_KEY: | |
| raise ValueError("CRITICAL: OPENAI_API_KEY missing from .env file.") | |
| # Models | |
| EMBEDDING_MODEL = "text-embedding-3-small" | |
| SPARSE_MODEL = "Qdrant/bm25" | |
| LLM_MODEL = "gpt-4o-mini" | |
| COLLECTION_NAME = "geneseek_clinical_trials" | |
| qdrant_client = QdrantClient( | |
| url=os.getenv("QDRANT_URL"), | |
| api_key=os.getenv("QDRANT_API_KEY") | |
| ) |