Spaces:
Sleeping
Sleeping
File size: 8,253 Bytes
7644eac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
"""
Configuration utilities for the AI Learning Path Generator.
Loads environment variables and provides configuration settings across the application.
"""
import os
from pathlib import Path
from dotenv import load_dotenv
# Load environment variables from .env file
# Load environment variables from .env file, expecting it at project root (2 levels up from this file).
# This ensures changes in .env are picked up correctly.
# Load .env file only if not on Render
if not os.environ.get('RENDER'):
dotenv_path = Path(__file__).resolve().parents[2] / '.env'
if dotenv_path.is_file():
load_dotenv(dotenv_path=dotenv_path)
print(f"--- Successfully loaded .env from: {dotenv_path} ---")
else:
# Fallback to default python-dotenv behavior (searches current dir and parents)
# This can be helpful if the script is run from an unexpected location.
print(
f"--- .env not found at {dotenv_path}, attempting default load_dotenv() search. ---")
loaded_by_default = load_dotenv()
if loaded_by_default:
print(
f"--- Successfully loaded .env from default location (e.g., {os.getcwd()}/.env or parent). ---")
else:
print("--- WARNING: .env file not found by explicit path or default search. Environment variables may not be set. ---")
# Development mode flag - checked before raising key errors
DEV_MODE = os.getenv('DEV_MODE', 'False').lower() == 'true'
# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
DEEPSEEK_API_KEY = os.getenv("DEEPSEEK_API_KEY")
OPENROUTER_API_KEY = os.getenv(
"OPENROUTER_API_KEY", "sk-or-v1-free") # OpenRouter free tier
# Deprecated - kept for backward compatibility but not used
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
# Perplexity pricing (cost per 1K tokens) - default 0 so users can opt-in
PERPLEXITY_PROMPT_COST_PER_1K = float(
os.getenv("PERPLEXITY_PROMPT_COST_PER_1K", "0"))
PERPLEXITY_COMPLETION_COST_PER_1K = float(
os.getenv("PERPLEXITY_COMPLETION_COST_PER_1K", "0"))
# Default model provider (can be 'openai', 'openrouter', or 'deepseek')
DEFAULT_PROVIDER = os.getenv("DEFAULT_PROVIDER", "openai").lower()
# Ensure required API keys are available (unless in DEV_MODE or using free OpenRouter)
if not DEV_MODE:
if DEFAULT_PROVIDER == 'openai' and not OPENAI_API_KEY:
raise EnvironmentError(
"OPENAI_API_KEY environment variable is required for OpenAI provider (unless DEV_MODE=true).")
elif DEFAULT_PROVIDER == 'deepseek' and not DEEPSEEK_API_KEY:
raise EnvironmentError(
"DEEPSEEK_API_KEY environment variable is required for DeepSeek provider (unless DEV_MODE=true).")
# OpenRouter doesn't require API key check (free tier uses sk-or-v1-free)
# Model configuration
# Using GPT-4o-mini: 3x cheaper than GPT-3.5-turbo, better quality!
# Cost: $0.15/1M input tokens vs $0.50 for GPT-3.5
# For OpenRouter free tier, use: mistralai/mistral-7b-instruct or meta-llama/llama-2-70b-chat
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "gpt-4o-mini")
OPENROUTER_FREE_MODEL = os.getenv(
"OPENROUTER_FREE_MODEL", "mistralai/mistral-7b-instruct") # Free model from OpenRouter
EMBEDDING_MODEL = os.getenv(
"EMBEDDING_MODEL", "text-embedding-3-small") # Cheaper embeddings
# Increased for complete learning path JSON
MAX_TOKENS = int(os.getenv("MAX_TOKENS", "4096"))
TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
# Alternative models for different use cases
REASONING_MODEL = os.getenv(
"REASONING_MODEL", "gpt-4o-mini") # For complex reasoning
SIMPLE_MODEL = os.getenv("SIMPLE_MODEL", "gpt-4o-mini") # For simple tasks
# (Deprecated) Perplexity settings – retained for legacy tests but not used by the app.
PERPLEXITY_MODEL = os.getenv("PERPLEXITY_MODEL", "pplx-7b-online") # noqa: E501
# Vector database settings
VECTOR_DB_PATH = os.getenv("VECTOR_DB_PATH", "./vector_db")
# Region settings
DEFAULT_REGION = os.getenv("DEFAULT_REGION", "North America")
# LangSmith Configuration (LLM Tracing & Debugging)
LANGCHAIN_TRACING_V2 = os.getenv(
"LANGCHAIN_TRACING_V2", "false").lower() == "true"
LANGCHAIN_ENDPOINT = os.getenv(
"LANGCHAIN_ENDPOINT", "https://api.smith.langchain.com")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv(
"LANGCHAIN_PROJECT", "ai-learning-path-generator")
# Weights & Biases Configuration (Metrics & Experiment Tracking)
WANDB_API_KEY = os.getenv("WANDB_API_KEY")
WANDB_PROJECT = os.getenv("WANDB_PROJECT", "ai-learning-path-generator")
WANDB_ENTITY = os.getenv("WANDB_ENTITY") # Your W&B username or team name
# 'online', 'offline', or 'disabled'
WANDB_MODE = os.getenv("WANDB_MODE", "online")
# ============================================
# ADVANCED RAG PIPELINE CONFIGURATION
# ============================================
# Redis Configuration (Semantic Caching)
REDIS_HOST = os.getenv("REDIS_HOST", "localhost")
REDIS_PORT = int(os.getenv("REDIS_PORT", "6379"))
REDIS_PASSWORD = os.getenv("REDIS_PASSWORD", None)
REDIS_DB = int(os.getenv("REDIS_DB", "0"))
REDIS_URL = os.getenv("REDIS_URL") # Alternative: full connection URL
SEMANTIC_CACHE_TTL = int(os.getenv("SEMANTIC_CACHE_TTL", "3600"))
SEMANTIC_CACHE_THRESHOLD = float(os.getenv("SEMANTIC_CACHE_THRESHOLD", "0.95"))
ENABLE_SEMANTIC_CACHE = os.getenv(
"ENABLE_SEMANTIC_CACHE", "True").lower() == "true"
# Cohere Reranking API
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
COHERE_RERANK_MODEL = os.getenv("COHERE_RERANK_MODEL", "rerank-english-v3.0")
USE_LOCAL_RERANKER = os.getenv("USE_LOCAL_RERANKER", "False").lower() == "true"
LOCAL_RERANKER_MODEL = os.getenv(
"LOCAL_RERANKER_MODEL", "cross-encoder/ms-marco-MiniLM-L-6-v2")
# Hybrid Search Configuration
BM25_K1 = float(os.getenv("BM25_K1", "1.5"))
BM25_B = float(os.getenv("BM25_B", "0.75"))
HYBRID_ALPHA = float(os.getenv("HYBRID_ALPHA", "0.5"))
HYBRID_TOP_K = int(os.getenv("HYBRID_TOP_K", "20"))
# Query Rewriting
QUERY_REWRITE_ENABLED = os.getenv(
"QUERY_REWRITE_ENABLED", "True").lower() == "true"
QUERY_REWRITE_MODEL = os.getenv("QUERY_REWRITE_MODEL", "gpt-3.5-turbo")
QUERY_REWRITE_MAX_TOKENS = int(os.getenv("QUERY_REWRITE_MAX_TOKENS", "100"))
# Contextual Compression
CONTEXTUAL_COMPRESSION_ENABLED = os.getenv(
"CONTEXTUAL_COMPRESSION_ENABLED", "True").lower() == "true"
COMPRESSION_MODEL = os.getenv("COMPRESSION_MODEL", "gpt-3.5-turbo")
COMPRESSION_MAX_TOKENS = int(os.getenv("COMPRESSION_MAX_TOKENS", "500"))
# Reranking Configuration
RERANK_TOP_K = int(os.getenv("RERANK_TOP_K", "5"))
RERANK_ENABLED = os.getenv("RERANK_ENABLED", "True").lower() == "true"
# Web app settings
DEBUG = os.getenv("DEBUG", "True").lower() in ("true", "1", "t")
PORT = int(os.getenv("PORT", "5000"))
# Learning paths configuration
LEARNING_STYLES = {
"visual": "Learns best through images, diagrams, and spatial understanding",
"auditory": "Learns best through listening and speaking",
"reading": "Learns best through written materials and note-taking",
"kinesthetic": "Learns best through hands-on activities and physical interaction"
}
EXPERTISE_LEVELS = {
"beginner": "No prior knowledge in the subject",
"intermediate": "Some familiarity with basic concepts",
"advanced": "Solid understanding of core principles",
"expert": "Deep knowledge and specialization"
}
TIME_COMMITMENTS = {
"minimal": "1-2 hours per week",
"moderate": "3-5 hours per week",
"substantial": "6-10 hours per week",
"intensive": "10+ hours per week"
}
# Resource types with weights for learning styles (higher = more relevant)
RESOURCE_TYPES = {
"video": {"visual": 5, "auditory": 4, "reading": 2, "kinesthetic": 3},
"article": {"visual": 3, "reading": 5, "auditory": 2, "kinesthetic": 1},
"book": {"reading": 5, "visual": 3, "auditory": 2, "kinesthetic": 1},
"interactive": {"kinesthetic": 5, "visual": 4, "auditory": 3, "reading": 3},
"course": {"visual": 4, "auditory": 4, "reading": 4, "kinesthetic": 3},
"documentation": {"reading": 5, "visual": 3, "auditory": 1, "kinesthetic": 1},
"podcast": {"auditory": 5, "reading": 2, "visual": 1, "kinesthetic": 1},
"project": {"kinesthetic": 5, "visual": 3, "reading": 3, "auditory": 2}
}
|