"""Centralized configuration for the Naexya Docs AI application. This module defines provider metadata, persona prompt templates, specification categories, and export rendering configuration in a single location. Keeping these values together makes it easier to maintain consistent behaviour across modules such as ``ai_client.py`` and ``app.py``. The dictionaries below are intentionally verbose and heavily commented so that future contributors can understand every field without cross-referencing API documentation. """ from __future__ import annotations import os from dataclasses import dataclass, field from pathlib import Path from typing import Any, Dict, Optional try: # Loading .env files is optional but convenient for local development. from dotenv import load_dotenv except ImportError: # pragma: no cover - dependency may be missing in some envs. def load_dotenv(*_args: object, **_kwargs: object) -> bool: """Fallback stub when python-dotenv is not installed.""" return False # --------------------------------------------------------------------------- # AI Provider configuration # --------------------------------------------------------------------------- # ``AI_PROVIDERS`` captures the details required to interact with each # third-party large language model. Each entry explains the authentication # header, supported models, and default parameter choices that the application # should use. Additional providers can be added by following the same schema. AI_PROVIDERS: Dict[str, Dict[str, Any]] = { "openai": { "display_name": "OpenAI", # Base endpoint for Chat Completions. Individual modules append # provider-specific paths as needed. "base_url": "https://api.openai.com/v1", "chat_endpoint": "https://api.openai.com/v1/chat/completions", "default_model": "gpt-5", "available_models": ["gpt-5"], # The provider requires a Bearer token with the ``Authorization`` header. "headers": { "Authorization": "Bearer {api_key}", "Content-Type": "application/json", }, # Conservative defaults to balance quality with latency and cost. "default_params": {"temperature": 0.7, "max_tokens": 2048}, # Basic rate-limit guidance for UI messaging and back-off strategies. "rate_limits": { "requests_per_minute": 500, "tokens_per_minute": 600000, }, }, "anthropic": { "display_name": "Anthropic", "base_url": "https://api.anthropic.com/v1", "chat_endpoint": "https://api.anthropic.com/v1/messages", "default_model": "claude-4-sonnet", "available_models": ["claude-4-sonnet"], # Anthropic expects both ``x-api-key`` and ``anthropic-version`` headers. "headers": { "x-api-key": "{api_key}", "anthropic-version": "2023-06-01", "Content-Type": "application/json", }, "default_params": {"temperature": 0.7, "max_tokens": 2048}, "rate_limits": { "requests_per_minute": 400, "tokens_per_minute": 480000, }, }, "google": { "display_name": "Google", "base_url": "https://generativelanguage.googleapis.com/v1", "chat_endpoint": "https://generativelanguage.googleapis.com/v1/models/gemini-2.5-pro:generateContent", "default_model": "gemini-2.5-pro", "available_models": ["gemini-2.5-pro"], # Gemini uses a query parameter for the API key; headers remain JSON. "headers": {"Content-Type": "application/json"}, "default_params": {"temperature": 0.7, "max_output_tokens": 2048}, "rate_limits": { "requests_per_minute": 300, "tokens_per_minute": 360000, }, }, "xai": { "display_name": "xAI", "base_url": "https://api.x.ai/v1", "chat_endpoint": "https://api.x.ai/v1/chat/completions", "default_model": "grok-4-fast", "available_models": ["grok-4-fast"], "headers": { "Authorization": "Bearer {api_key}", "Content-Type": "application/json", }, "default_params": {"temperature": 0.7, "max_tokens": 2048}, "rate_limits": { "requests_per_minute": 200, "tokens_per_minute": 240000, }, }, "moonshot": { "display_name": "Moonshot", "base_url": "https://api.moonshot.ai/v1", "chat_endpoint": "https://api.moonshot.ai/v1/chat/completions", "default_model": "kimi-k2", "available_models": ["kimi-k2"], "headers": { "Authorization": "Bearer {api_key}", "Content-Type": "application/json", }, "default_params": {"temperature": 0.7, "max_tokens": 2048}, "rate_limits": { "requests_per_minute": 150, "tokens_per_minute": 180000, }, }, "qwen": { "display_name": "Qwen", "base_url": "https://dashscope.aliyuncs.com/api/v1", "chat_endpoint": "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation", "default_model": "qwen3-next", "available_models": ["qwen3-next"], "headers": { "Authorization": "Bearer {api_key}", "Content-Type": "application/json", }, "default_params": {"temperature": 0.7, "max_tokens": 2048}, "rate_limits": { "requests_per_minute": 250, "tokens_per_minute": 300000, }, }, } # --------------------------------------------------------------------------- # Persona configuration # --------------------------------------------------------------------------- # Personas determine how AI assistants respond to users. Providing rich, # descriptive prompts ensures that conversations remain on-topic and that the # extracted specifications are actionable. AI_PERSONAS: Dict[str, Dict[str, str]] = { "requirements_specialist": { "display_name": "Requirements Specialist", "prompt": ( "You are an expert business analyst specializing in gathering and " "documenting software requirements. Focus on user stories, business " "features, workflows, and functional requirements. Always ask " "clarifying questions and provide structured output." ), }, "technical_architect": { "display_name": "Technical Architect", "prompt": ( "You are a senior technical architect specializing in system design " "and implementation. Focus on API specifications, database schemas, " "system architecture, and technical implementation details. Provide " "detailed technical specifications." ), }, } # --------------------------------------------------------------------------- # Specification taxonomy # --------------------------------------------------------------------------- # ``SPECIFICATION_TYPES`` controls the categories displayed in the UI when # reviewing and exporting specifications. SPECIFICATION_TYPES = [ "User Stories", "Features", "API Endpoints", "Database Design", "System Architecture", ] # --------------------------------------------------------------------------- # Export template configuration # --------------------------------------------------------------------------- # Each export format references template files stored under ``templates/``. The # metadata here describes how those templates should be used by the export # helpers in ``utils.py`` or ``app.py``. EXPORT_TEMPLATES: Dict[str, Dict[str, str]] = { "html": { "path": "templates/export_html.html", "content_type": "text/html", "description": "Rich HTML report suitable for sharing with stakeholders.", }, "markdown": { "path": "templates/export_markdown.md", "content_type": "text/markdown", "description": "Lightweight Markdown export for version control or wikis.", }, } # --------------------------------------------------------------------------- # Application configuration dataclasses # --------------------------------------------------------------------------- @dataclass class ProviderCredential: """Runtime view of provider configuration resolved from the environment.""" provider: str env_var: str api_key: Optional[str] = None @property def display_name(self) -> str: """Return the human-friendly name defined in ``AI_PROVIDERS``.""" provider_meta = AI_PROVIDERS.get(self.provider, {}) return provider_meta.get("display_name", self.provider.title()) @dataclass class AppConfig: """Container holding runtime configuration for the Gradio interface.""" database_path: Optional[Path] providers: Dict[str, ProviderCredential] = field(default_factory=dict) default_provider: str = "openai" demo_mode: bool = False space_id: Optional[str] = None persistence_enabled: bool = True @classmethod def from_environment(cls) -> "AppConfig": """Build an :class:`AppConfig` instance using environment variables.""" load_dotenv() validate_configuration() env = os.environ is_spaces = any(env.get(var) for var in ("SPACE_ID", "HF_SPACE_ID", "HF_HOME")) def _is_truthy(value: Optional[str]) -> bool: return str(value).strip().lower() in {"1", "true", "yes", "on"} disable_storage = _is_truthy(env.get("NAEXYA_DISABLE_STORAGE")) enable_storage = _is_truthy(env.get("NAEXYA_ENABLE_STORAGE")) if disable_storage: persistence_enabled = False elif enable_storage: persistence_enabled = True else: # Hugging Face Spaces mount a read-only filesystem for the repository. # Default to in-memory storage there unless explicitly overridden. persistence_enabled = not is_spaces if persistence_enabled: data_dir = Path( env.get("NAEXYA_DATA_DIR") or ("/data" if is_spaces else Path(__file__).resolve().parent) ) data_dir.mkdir(parents=True, exist_ok=True) database_path: Optional[Path] = ( data_dir / env.get("NAEXYA_DB_FILENAME", "naexya_docs_ai.db") ).resolve() else: database_path = None provider_env_map = { "openai": "OPENAI_API_KEY", "anthropic": "ANTHROPIC_API_KEY", "google": "GOOGLE_API_KEY", "xai": "XAI_API_KEY", "moonshot": "MOONSHOT_API_KEY", "qwen": "QWEN_API_KEY", } providers = { name: ProviderCredential( provider=name, env_var=env_var, api_key=env.get(env_var) or None, ) for name, env_var in provider_env_map.items() } # Choose a sensible default provider, preferring explicit environment configuration. configured = [key for key, cred in providers.items() if cred.api_key] requested_default = (env.get("NAEXYA_DEFAULT_PROVIDER") or "openai").lower() if requested_default not in providers: requested_default = "openai" default_provider = requested_default if (configured and requested_default in configured) else (configured[0] if configured else "openai") demo_mode = not bool(configured) return cls( database_path=database_path, providers=providers, default_provider=default_provider, demo_mode=demo_mode, space_id=env.get("SPACE_ID") or env.get("HF_SPACE_ID"), persistence_enabled=persistence_enabled, ) def get_api_key(self, provider: str) -> Optional[str]: """Retrieve the configured API key for ``provider`` if available.""" credential = self.providers.get(provider.lower()) return credential.api_key if credential else None def configured_providers(self) -> Dict[str, ProviderCredential]: """Return only the providers that currently have API keys configured.""" return {name: cred for name, cred in self.providers.items() if cred.api_key} # --------------------------------------------------------------------------- # Validation utilities # --------------------------------------------------------------------------- # The functions below provide quick sanity checks that configuration dictionaries # contain the expected fields. They raise ``ValueError`` with descriptive # messages so callers can fail fast during application start-up. def validate_provider_config(provider_key: str) -> None: """Validate a single provider configuration entry. Args: provider_key: The dictionary key identifying the provider (e.g. ``"openai"``). Raises: ValueError: If required fields are missing or improperly formatted. """ config = AI_PROVIDERS.get(provider_key) if config is None: raise ValueError(f"Provider '{provider_key}' is not defined in AI_PROVIDERS.") required_fields = [ "display_name", "base_url", "chat_endpoint", "default_model", "headers", "default_params", "rate_limits", ] missing = [field for field in required_fields if field not in config] if missing: raise ValueError( f"Provider '{provider_key}' is missing required fields: {', '.join(missing)}" ) if "Authorization" in config["headers"] and "{api_key}" not in config["headers"]["Authorization"]: raise ValueError( f"Provider '{provider_key}' Authorization header must include '{{api_key}}' placeholder." ) def validate_all_providers() -> None: """Validate every provider configuration entry.""" for provider_key in AI_PROVIDERS: validate_provider_config(provider_key) def validate_personas() -> None: """Ensure persona definitions include prompts for consistent behaviour.""" for key, persona in AI_PERSONAS.items(): if "prompt" not in persona or not persona["prompt"].strip(): raise ValueError(f"Persona '{key}' must include a non-empty prompt.") def validate_specification_types() -> None: """Verify specification types are unique and non-empty.""" if not SPECIFICATION_TYPES: raise ValueError("SPECIFICATION_TYPES must contain at least one entry.") normalized = [spec.strip() for spec in SPECIFICATION_TYPES if spec.strip()] if len(normalized) != len(SPECIFICATION_TYPES): raise ValueError("SPECIFICATION_TYPES must not contain blank values.") if len(set(normalized)) != len(normalized): raise ValueError("SPECIFICATION_TYPES entries must be unique.") def validate_export_templates() -> None: """Confirm export template metadata includes expected fields.""" required_fields = {"path", "content_type", "description"} for key, template in EXPORT_TEMPLATES.items(): missing = required_fields - template.keys() if missing: raise ValueError( f"Export template '{key}' is missing fields: {', '.join(sorted(missing))}" ) def validate_configuration() -> None: """Run all configuration validators. This helper is convenient during application start-up to ensure environment configuration issues are detected early rather than failing deep inside the request cycle. """ validate_all_providers() validate_personas() validate_specification_types() validate_export_templates() __all__ = [ "AI_PROVIDERS", "AI_PERSONAS", "SPECIFICATION_TYPES", "EXPORT_TEMPLATES", "ProviderCredential", "AppConfig", "validate_configuration", ]