Spaces:

MCP-1st-Birthday
/

codeAtlas

Sleeping

App Files Files Community

codeAtlas / src /config.py

aghilsabu

feat: add core configuration and project structure

8320683 about 2 months ago

raw

history blame contribute delete

6.75 kB

	"""CodeAtlas Configuration"""

	import os
	import json
	import logging
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Optional, Dict

	# Paths
	BASE_DIR = Path(__file__).parent.parent
	DATA_DIR = BASE_DIR / "data"
	DIAGRAMS_DIR = DATA_DIR / "diagrams"
	AUDIOS_DIR = DATA_DIR / "audios"
	LOGS_DIR = DATA_DIR / "logs"
	SESSION_FILE = BASE_DIR / ".session_state.json"

	for dir_path in [DATA_DIR, DIAGRAMS_DIR, AUDIOS_DIR, LOGS_DIR]:
	dir_path.mkdir(parents=True, exist_ok=True)

	# Logging
	LOG_FILE = LOGS_DIR / "codeatlas.log"
	logging.basicConfig(
	level=logging.INFO,
	format="%(asctime)s \| %(levelname)-8s \| %(name)s \| %(message)s",
	handlers=[
	logging.StreamHandler(),
	logging.FileHandler(LOG_FILE, encoding="utf-8"),
	],
	)
	logger = logging.getLogger("codeatlas")


	@dataclass
	class ModelConfig:
	"""Configuration for AI models."""

	# Gemini Models (latest first)
	GEMINI_MODELS: Dict[str, str] = field(default_factory=lambda: {
	"Gemini 3.0 Pro": "gemini-3.0-pro",
	"Gemini 2.5 Pro": "gemini-2.5-pro",
	"Gemini 2.5 Flash": "gemini-2.5-flash",
	"Gemini 2.5 Flash Lite": "gemini-2.5-flash-lite",
	"Gemini 2.0 Flash": "gemini-2.0-flash",
	"Gemini 2.0 Flash Lite": "gemini-2.0-flash-lite",
	})

	# OpenAI Models (latest first)
	OPENAI_MODELS: Dict[str, str] = field(default_factory=lambda: {
	"GPT-5.1": "gpt-5.1",
	"GPT-5 Mini": "gpt-5-mini",
	"GPT-5 Nano": "gpt-5-nano",
	})

	DEFAULT_MODEL: str = "Gemini 2.5 Pro"

	@property
	def all_models(self) -> Dict[str, str]:
	"""Get all available models."""
	return {self.GEMINI_MODELS, self.OPENAI_MODELS}

	def is_openai_model(self, model_name: str) -> bool:
	"""Check if a model is from OpenAI."""
	return model_name.startswith(("gpt-", "o1", "o3"))

	def get_model_id(self, display_name: str) -> str:
	"""Get model ID from display name."""
	return self.all_models.get(display_name, self.GEMINI_MODELS[self.DEFAULT_MODEL])


	@dataclass
	class ProcessingConfig:
	"""Configuration for code processing."""

	# File extensions to process
	ALLOWED_EXTENSIONS: set = field(default_factory=lambda: {
	".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".cpp", ".c", ".h",
	".cs", ".go", ".rs", ".php", ".rb", ".sql", ".yaml", ".yml",
	".json", ".md", ".txt", ".sh", ".bash", ".zsh"
	})

	# Special files to include regardless of extension
	ALLOWED_FILES: set = field(default_factory=lambda: {
	"Dockerfile", "Makefile", "README", "LICENSE", ".gitignore"
	})

	# Directories to ignore
	BLOCKED_DIRS: set = field(default_factory=lambda: {
	"node_modules", "__pycache__", ".git", "dist", "build", "venv",
	".venv", "env", ".env", ".idea", ".vscode", "coverage", ".next",
	"target", "bin", "obj", ".gradle", ".m2", "vendor", "Pods",
	"test", "tests", "__tests__", "spec", "specs", "testing",
	"test_data", "testdata", "fixtures", "mocks", "mock",
	"e2e", "integration", "unit", "cypress", "playwright"
	})

	# Files to ignore
	BLOCKED_PATTERNS: set = field(default_factory=lambda: {
	"package-lock.json", "yarn.lock", "pnpm-lock.yaml", "composer.lock",
	"Gemfile.lock", "Cargo.lock", "poetry.lock", ".DS_Store",
	".eslintrc", ".prettierrc", "tsconfig.json", "jest.config.js",
	"babel.config.js", ".babelrc", "webpack.config.js", "vite.config.js",
	"setup.cfg", "pyproject.toml", "tox.ini", ".coveragerc"
	})

	# Test file patterns
	TEST_FILE_PATTERNS: set = field(default_factory=lambda: {
	"test_", "_test.", ".test.", ".spec.", "_spec.",
	"conftest.py", "pytest.ini", "setup.py"
	})

	# Size limits
	MAX_FILE_SIZE: int = 50 * 1024 # 50KB
	MAX_CONTEXT_SIZE: int = 3_500_000 # ~1M tokens
	LARGE_REPO_THRESHOLD: int = 10_000_000 # 10MB


	@dataclass
	class Config:
	"""Main configuration class."""

	# API Keys (from environment or session)
	gemini_api_key: Optional[str] = field(
	default_factory=lambda: os.environ.get("GEMINI_API_KEY", "")
	)
	openai_api_key: Optional[str] = field(
	default_factory=lambda: os.environ.get("OPENAI_API_KEY", "")
	)
	elevenlabs_api_key: Optional[str] = field(
	default_factory=lambda: os.environ.get("ELEVENLABS_API_KEY", "")
	)

	# Model configuration
	models: ModelConfig = field(default_factory=ModelConfig)

	# Processing configuration
	processing: ProcessingConfig = field(default_factory=ProcessingConfig)

	# Current model selection
	current_model: str = "Gemini 2.5 Pro"

	# Paths
	diagrams_dir: Path = DIAGRAMS_DIR
	audios_dir: Path = AUDIOS_DIR
	session_file: Path = SESSION_FILE

	# Server settings
	server_host: str = "0.0.0.0"
	server_port: int = 7860

	def save_to_session(self) -> bool:
	"""Save current config to session file."""
	try:
	data = {
	"api_key": self.gemini_api_key,
	"openai_api_key": self.openai_api_key,
	"elevenlabs_api_key": self.elevenlabs_api_key,
	"model": self.current_model,
	}
	with open(self.session_file, "w") as f:
	json.dump(data, f)
	return True
	except Exception as e:
	logger.warning(f"Failed to save session: {e}")
	return False

	def load_from_session(self) -> "Config":
	"""Load config from session file."""
	try:
	if self.session_file.exists():
	with open(self.session_file, "r") as f:
	data = json.load(f)
	self.gemini_api_key = data.get("api_key", self.gemini_api_key)
	self.openai_api_key = data.get("openai_api_key", self.openai_api_key)
	self.elevenlabs_api_key = data.get("elevenlabs_api_key", self.elevenlabs_api_key)
	self.current_model = data.get("model", self.current_model)
	except Exception as e:
	logger.warning(f"Failed to load session: {e}")
	return self

	def get_api_key_for_model(self, model_name: str) -> str:
	"""Get the appropriate API key for a model."""
	if self.models.is_openai_model(model_name):
	return self.openai_api_key or ""
	return self.gemini_api_key or ""


	# Global config instance
	_config: Optional[Config] = None


	def get_config() -> Config:
	"""Get the global configuration instance."""
	global _config
	if _config is None:
	_config = Config().load_from_session()
	return _config