Spaces:

Chars
/

CognitiveKernel-Launchpad

Sleeping

File size: 20,564 Bytes

1ea26af

#!/usr/bin/env python3
# NOTICE: This file is adapted from Tencent's CognitiveKernel-Pro (https://github.com/Tencent/CognitiveKernel-Pro).
# Modifications in this fork (2025) are for academic research and educational use only; no commercial use.
# Original rights belong to the original authors and Tencent; see upstream license for details.

"""
CognitiveKernel-Pro TOML Configuration System

Centralized, typed configuration management replacing JSON/dict passing.
Follows Linus Torvalds philosophy: simple, direct, no defensive backups.
"""

import os
import logging as std_logging
from dataclasses import dataclass, field
from typing import Dict, Any, Optional
from pathlib import Path


@dataclass
class LLMConfig:
    """Language Model configuration - HTTP-only, fail-fast"""
    call_target: str  # Must be HTTP URL
    api_key: str      # Required
    model: str        # Required
    api_base_url: Optional[str] = None  # Backward compatibility
    request_timeout: int = 600
    max_retry_times: int = 5
    max_token_num: int = 20000
    extract_body: Dict[str, Any] = field(default_factory=dict)
    # Backward compatibility attributes (ignored)
    thinking: bool = False
    seed: int = 1377


@dataclass
class WebEnvConfig:
    """Web Environment configuration (HTTP API)"""
    web_ip: str = "localhost:3000"
    web_command: str = ""
    web_timeout: int = 600
    screenshot_boxed: bool = True
    target_url: str = "https://www.bing.com/"


@dataclass
class WebEnvBuiltinConfig:
    """Playwright builtin Web Environment configuration"""
    max_browsers: int = 16
    headless: bool = True
    web_timeout: int = 600
    screenshot_boxed: bool = True
    target_url: str = "https://www.bing.com/"


@dataclass
class WebAgentConfig:
    """Web Agent configuration"""
    max_steps: int = 20
    use_multimodal: str = "auto"  # off|yes|auto
    model: LLMConfig = field(default_factory=lambda: LLMConfig(
        call_target=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions"),
        api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"),
        model=os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini"),
        extract_body={"temperature": 0.0, "max_tokens": 8192}
    ))
    model_multimodal: LLMConfig = field(default_factory=lambda: LLMConfig(
        call_target=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions"),
        api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"),
        model=os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini"),
        extract_body={"temperature": 0.0, "max_tokens": 8192}
    ))
    env: WebEnvConfig = field(default_factory=WebEnvConfig)
    env_builtin: WebEnvBuiltinConfig = field(default_factory=WebEnvBuiltinConfig)


@dataclass
class FileAgentConfig:
    """File Agent configuration"""
    max_steps: int = 16
    max_file_read_tokens: int = 3000
    max_file_screenshots: int = 2
    model: LLMConfig = field(default_factory=lambda: LLMConfig(
        call_target=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions"),
        api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"),
        model=os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini"),
        extract_body={"temperature": 0.3, "max_tokens": 8192}
    ))
    model_multimodal: LLMConfig = field(default_factory=lambda: LLMConfig(
        call_target=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions"),
        api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"),
        model=os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini"),
        extract_body={"temperature": 0.0, "max_tokens": 8192}
    ))


@dataclass
class CKAgentConfig:
    """Core CKAgent configuration"""
    name: str = "ck_agent"
    description: str = "Cognitive Kernel, an initial autopilot system."
    max_steps: int = 16
    max_time_limit: int = 4200
    recent_steps: int = 5
    obs_max_token: int = 8192
    exec_timeout_with_call: int = 1000
    exec_timeout_wo_call: int = 200
    end_template: str = "more"  # less|medium|more controls ck_end verbosity (default: more)
    model: LLMConfig = field(default_factory=lambda: LLMConfig(
        call_target=os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions"),
        api_key=os.environ.get("OPENAI_API_KEY", "your-api-key-here"),
        model=os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini"),
        extract_body={"temperature": 0.6, "max_tokens": 4000}
    ))


@dataclass
class LoggingConfig:
    """Centralized logging configuration"""
    console_level: str = "INFO"
    log_dir: str = "logs"
    session_logs: bool = True


@dataclass
class SearchConfig:
    """Search backend configuration"""
    backend: str = "google"  # google|duckduckgo




@dataclass
class EnvironmentConfig:
    """System environment configuration"""


@dataclass
class Settings:
    """Root configuration object"""
    ck: CKAgentConfig = field(default_factory=CKAgentConfig)
    web: WebAgentConfig = field(default_factory=WebAgentConfig)
    file: FileAgentConfig = field(default_factory=FileAgentConfig)
    logging: LoggingConfig = field(default_factory=LoggingConfig)
    search: SearchConfig = field(default_factory=SearchConfig)
    environment: EnvironmentConfig = field(default_factory=EnvironmentConfig)

    @classmethod
    def load(cls, path: str = "config.toml") -> "Settings":
        """Load configuration from TOML file or build from environment.

        If the TOML file does not exist and OPENAI_* environment variables are
        provided, build settings that source credentials from environment vars.
        Falls back to hardcoded defaults otherwise.
        """
        try:
            import tomllib
        except ImportError:
            # Python < 3.11 fallback
            try:
                import tomli as tomllib
            except ImportError:
                raise ImportError(
                    "TOML support requires Python 3.11+ or 'pip install tomli'"
                )

        config_path = Path(path)

        if not config_path.exists():
            # Environment-only path: create minimal sections so env fallback triggers
            env_vars = {
                "OPENAI_API_BASE": os.environ.get("OPENAI_API_BASE"),
                "OPENAI_API_KEY": os.environ.get("OPENAI_API_KEY"),
                "OPENAI_API_MODEL": os.environ.get("OPENAI_API_MODEL")
            }

            env_present = bool(env_vars["OPENAI_API_BASE"] or env_vars["OPENAI_API_KEY"] or env_vars["OPENAI_API_MODEL"])

            if env_present:
                data: Dict[str, Any] = {
                    "ck": {"model": {}},
                    "web": {"model": {}, "model_multimodal": {}},
                    "file": {"model": {}, "model_multimodal": {}},
                }
                return cls._from_dict(data)
            else:
                return cls()

        try:
            with open(config_path, "rb") as f:
                data = tomllib.load(f)
        except Exception as e:
            raise

        return cls._from_dict(data)

    @classmethod
    def _from_dict(cls, data: Dict[str, Any]) -> "Settings":
        """Convert TOML dict to Settings object"""
        # Extract sections with defaults
        ck_data = data.get("ck", {})
        web_data = data.get("web", {})
        file_data = data.get("file", {})
        logging_data = data.get("logging", {})
        search_data = data.get("search", {})
        environment_data = data.get("environment", {})

        # Build nested configs
        ck_config = CKAgentConfig(
            name=ck_data.get("name", "ck_agent"),
            description=ck_data.get("description", "Cognitive Kernel, an initial autopilot system."),
            max_steps=ck_data.get("max_steps", 16),
            max_time_limit=ck_data.get("max_time_limit", 4200),
            recent_steps=ck_data.get("recent_steps", 5),
            obs_max_token=ck_data.get("obs_max_token", 8192),
            exec_timeout_with_call=ck_data.get("exec_timeout_with_call", 1000),
            exec_timeout_wo_call=ck_data.get("exec_timeout_wo_call", 200),
            end_template=ck_data.get("end_template", "more"),
            # Always build model (even if empty dict) so env fallback can apply
            model=cls._build_llm_config(ck_data.get("model", {}), {
                "temperature": 0.6, "max_tokens": 4000
            })
        )

        web_config = WebAgentConfig(
            max_steps=web_data.get("max_steps", 20),
            use_multimodal=web_data.get("use_multimodal", "auto"),
            model=cls._build_llm_config(web_data.get("model", {}), {
                "temperature": 0.0, "max_tokens": 8192
            }),
            model_multimodal=cls._build_llm_config(web_data.get("model_multimodal", {}), {
                "temperature": 0.0, "max_tokens": 8192
            }),
            env=cls._build_web_env_config(web_data.get("env", {})),
            env_builtin=cls._build_web_env_builtin_config(web_data.get("env_builtin", {}))
        )

        file_config = FileAgentConfig(
            max_steps=file_data.get("max_steps", 16),
            max_file_read_tokens=file_data.get("max_file_read_tokens", 3000),
            max_file_screenshots=file_data.get("max_file_screenshots", 2),
            model=cls._build_llm_config(file_data.get("model", {}), {
                "temperature": 0.3, "max_tokens": 8192
            }),
            model_multimodal=cls._build_llm_config(file_data.get("model_multimodal", {}), {
                "temperature": 0.0, "max_tokens": 8192
            })
        )

        logging_config = LoggingConfig(
            console_level=logging_data.get("console_level", "INFO"),
            log_dir=logging_data.get("log_dir", "logs"),
            session_logs=logging_data.get("session_logs", True)
        )

        search_config = SearchConfig(
            backend=search_data.get("backend", "google")
        )

        environment_config = EnvironmentConfig()

        return cls(
            ck=ck_config,
            web=web_config,
            file=file_config,
            logging=logging_config,
            search=search_config,
            environment=environment_config
        )

    @staticmethod
    def _build_llm_config(llm_data: Dict[str, Any], default_extract_body: Dict[str, Any]) -> LLMConfig:
        """Build LLMConfig from TOML data - HTTP-only, fail-fast

        Priority order: TOML config > Inheritance > Environment variables > Hardcoded defaults

        Environment variable support:
        - OPENAI_API_BASE: Default API base URL
        - OPENAI_API_KEY: Default API key
        - OPENAI_API_MODEL: Default model name

        Environment variables are only used when the corresponding config value is not provided.
        """
        # Merge default extract_body with config
        extract_body = default_extract_body.copy()
        extract_body.update(llm_data.get("extract_body", {}))
        # Also support legacy call_kwargs section for backward compatibility
        extract_body.update(llm_data.get("call_kwargs", {}))

        # HTTP-only validation and environment variable fallback
        call_target = llm_data.get("call_target")
        if call_target is None:
            call_target = os.environ.get("OPENAI_API_BASE", "https://api.openai.com/v1/chat/completions")

        # Validate HTTP URL regardless of source (config or env var)
        if not call_target.startswith("http"):
            raise ValueError(f"call_target must be HTTP URL, got: {call_target}")

        api_key = llm_data.get("api_key")
        if not api_key:
            api_key = os.environ.get("OPENAI_API_KEY", "your-api-key-here")

        model = llm_data.get("model")
        if not model:
            model = os.environ.get("OPENAI_API_MODEL", "gpt-4o-mini")

        # Extract api_base_url from call_target only if explicitly requested
        api_base_url = llm_data.get("api_base_url")
        # Do not auto-extract from call_target to preserve inheritance behavior

        config = LLMConfig(
            call_target=call_target,
            api_key=api_key,
            model=model,
            api_base_url=api_base_url,
            request_timeout=llm_data.get("request_timeout", 600),
            max_retry_times=llm_data.get("max_retry_times", 5),
            max_token_num=llm_data.get("max_token_num", 20000),
            extract_body=extract_body,
            thinking=llm_data.get("thinking", False),
            seed=llm_data.get("seed", 1377),
        )

        return config

    @staticmethod
    def _build_web_env_config(env_data: Dict[str, Any]) -> WebEnvConfig:
        """Build WebEnvConfig from TOML data"""
        return WebEnvConfig(
            web_ip=env_data.get("web_ip", "localhost:3000"),
            web_command=env_data.get("web_command", ""),
            web_timeout=env_data.get("web_timeout", 600),
            screenshot_boxed=env_data.get("screenshot_boxed", True),
            target_url=env_data.get("target_url", "https://www.bing.com/")
        )

    @staticmethod
    def _build_web_env_builtin_config(env_data: Dict[str, Any]) -> WebEnvBuiltinConfig:
        """Build WebEnvBuiltinConfig from TOML data"""
        return WebEnvBuiltinConfig(
            max_browsers=env_data.get("max_browsers", 16),
            headless=env_data.get("headless", True),
            web_timeout=env_data.get("web_timeout", 600),
            screenshot_boxed=env_data.get("screenshot_boxed", True),
            target_url=env_data.get("target_url", "https://www.bing.com/")
        )

    def validate(self) -> None:
        """Validate configuration values"""
        # Validate use_multimodal enum
        if self.web.use_multimodal not in {"off", "yes", "auto"}:
            raise ValueError(f"web.use_multimodal must be 'off', 'yes', or 'auto', got: {self.web.use_multimodal}")

        # Validate search backend
        if self.search.backend not in {"google", "duckduckgo"}:
            raise ValueError(f"search.backend must be 'google' or 'duckduckgo', got: {self.search.backend}")

        # Validate std_logging level
        valid_levels = {"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}
        if self.logging.console_level not in valid_levels:
            raise ValueError(f"logging.console_level must be one of {valid_levels}, got: {self.logging.console_level}")

    def to_ckagent_kwargs(self) -> Dict[str, Any]:
        """Convert Settings to CKAgent constructor kwargs"""
        # Parent→child inheritance for API creds
        parent_model = self._llm_config_to_dict(self.ck.model)
        web_model = self._llm_config_to_dict(self.web.model)
        file_model = self._llm_config_to_dict(self.file.model)
        web_mm_model = self._llm_config_to_dict(self.web.model_multimodal)
        file_mm_model = self._llm_config_to_dict(self.file.model_multimodal)

        def inherit(child: Dict[str, Any], parent: Dict[str, Any]) -> Dict[str, Any]:
            # Inherit fields that are missing or empty in child
            if ("api_base_url" not in child or not child.get("api_base_url")) and "api_base_url" in parent:
                child["api_base_url"] = parent["api_base_url"]
            if ("api_key" not in child or not child.get("api_key")) and "api_key" in parent:
                child["api_key"] = parent["api_key"]
            if ("model" not in child or not child.get("model")) and "model" in parent:
                child["model"] = parent["model"]
            return child

        web_model = inherit(web_model, parent_model)
        file_model = inherit(file_model, parent_model)
        web_mm_model = inherit(web_mm_model, parent_model)
        file_mm_model = inherit(file_mm_model, parent_model)

        # Legacy tests expect a reduced model dict with call_kwargs etc.
        def reduce_model(m: Dict[str, Any]) -> Dict[str, Any]:
            out = {
                "call_target": m.get("call_target"),
                "thinking": m.get("thinking", False),
                "request_timeout": m.get("request_timeout", 600),
                "max_retry_times": m.get("max_retry_times", 5),
                "seed": m.get("seed", 1377),
                "max_token_num": m.get("max_token_num", 20000),
                "call_kwargs": m.get("extract_body", {}),
            }
            # Preserve API credentials for integration tests that assert existence
            if m.get("api_key") is not None:
                out["api_key"] = m["api_key"]
            if m.get("api_base_url") is not None:
                out["api_base_url"] = m["api_base_url"]
            if m.get("model") is not None:
                out["model"] = m["model"]
            return out

        return {
            "name": self.ck.name,
            "description": self.ck.description,
            "max_steps": self.ck.max_steps,
            "max_time_limit": self.ck.max_time_limit,
            "recent_steps": self.ck.recent_steps,
            "obs_max_token": self.ck.obs_max_token,
            "exec_timeout_with_call": self.ck.exec_timeout_with_call,
            "exec_timeout_wo_call": self.ck.exec_timeout_wo_call,
            "end_template": self.ck.end_template,
            "model": reduce_model(parent_model),
            "web_agent": {
                "max_steps": self.web.max_steps,
                "use_multimodal": self.web.use_multimodal,
                "model": reduce_model(web_model),
                "model_multimodal": reduce_model(web_mm_model),
                "web_env_kwargs": {
                    "web_ip": self.web.env.web_ip,
                    "web_command": self.web.env.web_command,
                    "web_timeout": self.web.env.web_timeout,
                    "screenshot_boxed": self.web.env.screenshot_boxed,
                    "target_url": self.web.env.target_url,
                    # Builtin env config for fuse fallback
                    "max_browsers": self.web.env_builtin.max_browsers,
                    "headless": self.web.env_builtin.headless,
                }
            },
            "file_agent": {
                "max_steps": self.file.max_steps,
                "max_file_read_tokens": self.file.max_file_read_tokens,
                "max_file_screenshots": self.file.max_file_screenshots,
                "model": reduce_model(file_model),
                "model_multimodal": reduce_model(file_mm_model),
            },
            "search_backend": self.search.backend,  # Add search backend configuration
        }

    def _llm_config_to_dict(self, llm_config: LLMConfig) -> Dict[str, Any]:
        """Convert LLMConfig to dict for agent initialization - HTTP-only"""
        return {
            "call_target": llm_config.call_target,
            "api_key": llm_config.api_key,
            "model": llm_config.model,
            "extract_body": llm_config.extract_body.copy(),
            "request_timeout": llm_config.request_timeout,
            "max_retry_times": llm_config.max_retry_times,
            "max_token_num": llm_config.max_token_num,
            # Backward compatibility (ignored by LLM)
            "thinking": llm_config.thinking,
            "seed": llm_config.seed,
        }

    def build_logger(self) -> std_logging.Logger:
        """Create configured logger instance"""
        # Create logs directory
        log_dir = Path(self.logging.log_dir)
        log_dir.mkdir(exist_ok=True)

        # Create logger
        logger = std_logging.getLogger("CognitiveKernel")
        logger.setLevel(getattr(std_logging, self.logging.console_level))

        # Clear existing handlers
        logger.handlers.clear()

        # Console handler
        console_handler = std_logging.StreamHandler()
        console_handler.setLevel(getattr(std_logging, self.logging.console_level))
        console_formatter = std_logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
        )
        console_handler.setFormatter(console_formatter)
        logger.addHandler(console_handler)

        # File handler if session_logs enabled
        if self.logging.session_logs:
            from datetime import datetime
            timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
            log_file = log_dir / f"ck_session_{timestamp}.log"
            file_handler = std_logging.FileHandler(log_file, encoding="utf-8")
            file_handler.setLevel(getattr(std_logging, self.logging.console_level))
            file_handler.setFormatter(console_formatter)
            logger.addHandler(file_handler)

        return logger