|
|
import os |
|
|
from pathlib import Path |
|
|
from langchain_huggingface import HuggingFaceEmbeddings |
|
|
from langchain_openai import ChatOpenAI |
|
|
from dotenv import load_dotenv |
|
|
import logging |
|
|
from logging.handlers import RotatingFileHandler |
|
|
from pydantic_settings import BaseSettings, SettingsConfigDict |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
class Settings(BaseSettings): |
|
|
model_config = SettingsConfigDict(env_file='.env', env_file_encoding='utf-8', extra='ignore') |
|
|
|
|
|
OPENAI_API_KEY: str |
|
|
OPENAI_BASE_URL: str | None = None |
|
|
|
|
|
LOG_LEVEL: str = os.getenv("LOG_LEVEL", "INFO") |
|
|
DATA_DIR: str = os.getenv("DATA_DIR", "") |
|
|
LOG_DIR: str = os.getenv("LOG_DIR", "") |
|
|
|
|
|
|
|
|
settings = Settings() |
|
|
|
|
|
|
|
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent.absolute() |
|
|
DATA_DIR = Path(settings.DATA_DIR or (PROJECT_ROOT / "data")) |
|
|
NEW_DATA = DATA_DIR / "new_data" |
|
|
CHUNKS_PATH = DATA_DIR / "chunks.pkl" |
|
|
VECTOR_STORE_DIR = DATA_DIR / "vector_store" |
|
|
DATA_DIR.mkdir(parents=True, exist_ok=True) |
|
|
NEW_DATA.mkdir(parents=True, exist_ok=True) |
|
|
VECTOR_STORE_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
|
|
|
LOG_DIR = Path(settings.LOG_DIR or (Path(__file__).parent.parent / "logs")) |
|
|
LOG_DIR.mkdir(parents=True, exist_ok=True) |
|
|
|
|
|
LOG_FILE = LOG_DIR / "app.log" |
|
|
|
|
|
|
|
|
LOG_LEVEL = settings.LOG_LEVEL.upper() |
|
|
logger = logging.getLogger("AgenticMedicalRAG") |
|
|
logger.setLevel(LOG_LEVEL) |
|
|
logger.propagate = False |
|
|
if not logger.handlers: |
|
|
formatter = logging.Formatter( |
|
|
fmt="%(asctime)s - %(name)s - %(levelname)s - %(message)s" |
|
|
) |
|
|
file_handler = RotatingFileHandler( |
|
|
LOG_FILE, |
|
|
maxBytes=1000000, |
|
|
backupCount=3, |
|
|
encoding="utf-8" |
|
|
) |
|
|
file_handler.setFormatter(formatter) |
|
|
stream_handler = logging.StreamHandler() |
|
|
stream_handler.setFormatter(formatter) |
|
|
logger.addHandler(file_handler) |
|
|
logger.addHandler(stream_handler) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_llm = None |
|
|
|
|
|
def get_llm(): |
|
|
"""Get LLM with lazy loading for faster startup""" |
|
|
global _llm |
|
|
if _llm is None: |
|
|
logger.info("Initializing LLM (first time)...") |
|
|
openai_key = settings.OPENAI_API_KEY |
|
|
|
|
|
if not openai_key: |
|
|
logger.error("OPENAI_API_KEY not found in environment variables") |
|
|
raise ValueError("OpenAI API key is required. Please set OPENAI_API_KEY environment variable.") |
|
|
|
|
|
try: |
|
|
_llm = ChatOpenAI( |
|
|
model="gpt-4o", |
|
|
api_key=openai_key, |
|
|
base_url=settings.OPENAI_BASE_URL, |
|
|
temperature=0.0, |
|
|
max_tokens=2048, |
|
|
request_timeout=30, |
|
|
max_retries=2, |
|
|
streaming=True, |
|
|
) |
|
|
logger.info("LLM initialized successfully") |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to initialize LLM: {e}") |
|
|
raise |
|
|
return _llm |
|
|
|
|
|
def create_llm(): |
|
|
"""Create LLM with proper error handling and fallbacks""" |
|
|
return get_llm() |
|
|
|
|
|
|
|
|
LLM = None |
|
|
|
|
|
|
|
|
_embedding_model = None |
|
|
|
|
|
def get_embedding_model(): |
|
|
"""Get embedding model with lazy loading for faster startup""" |
|
|
global _embedding_model |
|
|
if _embedding_model is None: |
|
|
logger.info("Loading embedding model (first time)...") |
|
|
try: |
|
|
_embedding_model = HuggingFaceEmbeddings( |
|
|
model_name="abhinand/MedEmbed-base-v0.1", |
|
|
model_kwargs={'device': 'cpu'}, |
|
|
encode_kwargs={'normalize_embeddings': True} |
|
|
) |
|
|
logger.info("Embedding model loaded successfully") |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to load embedding model: {e}") |
|
|
raise ValueError("Failed to load embedding model") |
|
|
return _embedding_model |
|
|
|
|
|
|
|
|
def create_embedding_model(): |
|
|
"""Create embedding model with proper error handling""" |
|
|
return get_embedding_model() |
|
|
|
|
|
|
|
|
EMBEDDING_MODEL = None |
|
|
|
|
|
|
|
|
def validate_config(): |
|
|
"""Validate all required configurations""" |
|
|
required_env_vars = ["OPENAI_API_KEY"] |
|
|
missing_vars = [var for var in required_env_vars if not getattr(settings, var, None)] |
|
|
|
|
|
if missing_vars: |
|
|
raise ValueError(f"Missing required environment variables: {missing_vars}") |
|
|
|
|
|
logger.info("Configuration validation completed") |
|
|
|
|
|
|
|
|
try: |
|
|
validate_config() |
|
|
except Exception as e: |
|
|
logger.error(f"Configuration validation failed: {e}") |
|
|
raise e |
|
|
|