codeAtlas / src /config.py
aghilsabu's picture
feat: add core configuration and project structure
8320683
"""CodeAtlas Configuration"""
import os
import json
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Dict
# Paths
BASE_DIR = Path(__file__).parent.parent
DATA_DIR = BASE_DIR / "data"
DIAGRAMS_DIR = DATA_DIR / "diagrams"
AUDIOS_DIR = DATA_DIR / "audios"
LOGS_DIR = DATA_DIR / "logs"
SESSION_FILE = BASE_DIR / ".session_state.json"
for dir_path in [DATA_DIR, DIAGRAMS_DIR, AUDIOS_DIR, LOGS_DIR]:
dir_path.mkdir(parents=True, exist_ok=True)
# Logging
LOG_FILE = LOGS_DIR / "codeatlas.log"
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s | %(levelname)-8s | %(name)s | %(message)s",
handlers=[
logging.StreamHandler(),
logging.FileHandler(LOG_FILE, encoding="utf-8"),
],
)
logger = logging.getLogger("codeatlas")
@dataclass
class ModelConfig:
"""Configuration for AI models."""
# Gemini Models (latest first)
GEMINI_MODELS: Dict[str, str] = field(default_factory=lambda: {
"Gemini 3.0 Pro": "gemini-3.0-pro",
"Gemini 2.5 Pro": "gemini-2.5-pro",
"Gemini 2.5 Flash": "gemini-2.5-flash",
"Gemini 2.5 Flash Lite": "gemini-2.5-flash-lite",
"Gemini 2.0 Flash": "gemini-2.0-flash",
"Gemini 2.0 Flash Lite": "gemini-2.0-flash-lite",
})
# OpenAI Models (latest first)
OPENAI_MODELS: Dict[str, str] = field(default_factory=lambda: {
"GPT-5.1": "gpt-5.1",
"GPT-5 Mini": "gpt-5-mini",
"GPT-5 Nano": "gpt-5-nano",
})
DEFAULT_MODEL: str = "Gemini 2.5 Pro"
@property
def all_models(self) -> Dict[str, str]:
"""Get all available models."""
return {**self.GEMINI_MODELS, **self.OPENAI_MODELS}
def is_openai_model(self, model_name: str) -> bool:
"""Check if a model is from OpenAI."""
return model_name.startswith(("gpt-", "o1", "o3"))
def get_model_id(self, display_name: str) -> str:
"""Get model ID from display name."""
return self.all_models.get(display_name, self.GEMINI_MODELS[self.DEFAULT_MODEL])
@dataclass
class ProcessingConfig:
"""Configuration for code processing."""
# File extensions to process
ALLOWED_EXTENSIONS: set = field(default_factory=lambda: {
".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
".cs", ".go", ".rs", ".php", ".rb", ".sql", ".yaml", ".yml",
".json", ".md", ".txt", ".sh", ".bash", ".zsh"
})
# Special files to include regardless of extension
ALLOWED_FILES: set = field(default_factory=lambda: {
"Dockerfile", "Makefile", "README", "LICENSE", ".gitignore"
})
# Directories to ignore
BLOCKED_DIRS: set = field(default_factory=lambda: {
"node_modules", "__pycache__", ".git", "dist", "build", "venv",
".venv", "env", ".env", ".idea", ".vscode", "coverage", ".next",
"target", "bin", "obj", ".gradle", ".m2", "vendor", "Pods",
"test", "tests", "__tests__", "spec", "specs", "testing",
"test_data", "testdata", "fixtures", "mocks", "mock",
"e2e", "integration", "unit", "cypress", "playwright"
})
# Files to ignore
BLOCKED_PATTERNS: set = field(default_factory=lambda: {
"package-lock.json", "yarn.lock", "pnpm-lock.yaml", "composer.lock",
"Gemfile.lock", "Cargo.lock", "poetry.lock", ".DS_Store",
".eslintrc", ".prettierrc", "tsconfig.json", "jest.config.js",
"babel.config.js", ".babelrc", "webpack.config.js", "vite.config.js",
"setup.cfg", "pyproject.toml", "tox.ini", ".coveragerc"
})
# Test file patterns
TEST_FILE_PATTERNS: set = field(default_factory=lambda: {
"test_", "_test.", ".test.", ".spec.", "_spec.",
"conftest.py", "pytest.ini", "setup.py"
})
# Size limits
MAX_FILE_SIZE: int = 50 * 1024 # 50KB
MAX_CONTEXT_SIZE: int = 3_500_000 # ~1M tokens
LARGE_REPO_THRESHOLD: int = 10_000_000 # 10MB
@dataclass
class Config:
"""Main configuration class."""
# API Keys (from environment or session)
gemini_api_key: Optional[str] = field(
default_factory=lambda: os.environ.get("GEMINI_API_KEY", "")
)
openai_api_key: Optional[str] = field(
default_factory=lambda: os.environ.get("OPENAI_API_KEY", "")
)
elevenlabs_api_key: Optional[str] = field(
default_factory=lambda: os.environ.get("ELEVENLABS_API_KEY", "")
)
# Model configuration
models: ModelConfig = field(default_factory=ModelConfig)
# Processing configuration
processing: ProcessingConfig = field(default_factory=ProcessingConfig)
# Current model selection
current_model: str = "Gemini 2.5 Pro"
# Paths
diagrams_dir: Path = DIAGRAMS_DIR
audios_dir: Path = AUDIOS_DIR
session_file: Path = SESSION_FILE
# Server settings
server_host: str = "0.0.0.0"
server_port: int = 7860
def save_to_session(self) -> bool:
"""Save current config to session file."""
try:
data = {
"api_key": self.gemini_api_key,
"openai_api_key": self.openai_api_key,
"elevenlabs_api_key": self.elevenlabs_api_key,
"model": self.current_model,
}
with open(self.session_file, "w") as f:
json.dump(data, f)
return True
except Exception as e:
logger.warning(f"Failed to save session: {e}")
return False
def load_from_session(self) -> "Config":
"""Load config from session file."""
try:
if self.session_file.exists():
with open(self.session_file, "r") as f:
data = json.load(f)
self.gemini_api_key = data.get("api_key", self.gemini_api_key)
self.openai_api_key = data.get("openai_api_key", self.openai_api_key)
self.elevenlabs_api_key = data.get("elevenlabs_api_key", self.elevenlabs_api_key)
self.current_model = data.get("model", self.current_model)
except Exception as e:
logger.warning(f"Failed to load session: {e}")
return self
def get_api_key_for_model(self, model_name: str) -> str:
"""Get the appropriate API key for a model."""
if self.models.is_openai_model(model_name):
return self.openai_api_key or ""
return self.gemini_api_key or ""
# Global config instance
_config: Optional[Config] = None
def get_config() -> Config:
"""Get the global configuration instance."""
global _config
if _config is None:
_config = Config().load_from_session()
return _config