|
|
|
|
|
""" |
|
|
HASHIRU 6.1 - OPTIMIZED CONFIGURATION 2025 |
|
|
Hardware-Specific AI Model Configuration based on 2025 Research |
|
|
|
|
|
Optimizations: |
|
|
- RTX 3060 (12GB) + RTX 2060 (6GB) specific model selection |
|
|
- Performance vs Quality balanced configurations |
|
|
- Type-safe dataclasses with runtime validation |
|
|
- Environment variable override support |
|
|
- Immutable configuration with validation |
|
|
- Memory-efficient model allocation |
|
|
|
|
|
Research Sources: |
|
|
- GPU memory optimization studies (NVIDIA 2025) |
|
|
- Dual GPU LLM inference benchmarks |
|
|
- Python dataclass best practices |
|
|
- Production-ready configuration management |
|
|
""" |
|
|
|
|
|
import os |
|
|
import logging |
|
|
from dataclasses import dataclass, field |
|
|
from pathlib import Path |
|
|
from typing import List, Dict, Optional, Union, Any, Tuple |
|
|
from enum import Enum |
|
|
|
|
|
|
|
|
logger = logging.getLogger("hashiru.config") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class ModelSize(Enum): |
|
|
"""Model size categories optimized for dual GPU setup""" |
|
|
SMALL_3B = "3B" |
|
|
MEDIUM_7B = "7B" |
|
|
LARGE_8B = "8B" |
|
|
XLARGE_14B = "14B" |
|
|
PREMIUM_33B = "33B" |
|
|
|
|
|
class Precision(Enum): |
|
|
"""Precision formats for memory optimization""" |
|
|
FP16 = "fp16" |
|
|
INT8 = "int8" |
|
|
INT4 = "int4" |
|
|
FP8 = "fp8" |
|
|
|
|
|
class ModelType(Enum): |
|
|
"""Model usage types for specialized selection""" |
|
|
REASONING = "reasoning" |
|
|
CODE = "code" |
|
|
CONVERSATION = "conversation" |
|
|
TOOLS = "tools" |
|
|
RESEARCH = "research" |
|
|
GENERAL = "general" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class ModelConfig: |
|
|
"""Single model configuration with hardware specifications""" |
|
|
name: str |
|
|
size: ModelSize |
|
|
vram_requirement_gb: float |
|
|
recommended_precision: Precision |
|
|
performance_tier: int |
|
|
tokens_per_second_estimate: int |
|
|
description: str |
|
|
|
|
|
def is_compatible_with_vram(self, available_vram_gb: float) -> bool: |
|
|
"""Check if model fits in available VRAM""" |
|
|
return self.vram_requirement_gb <= available_vram_gb |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class HardwareProfile: |
|
|
"""Hardware specification for model selection""" |
|
|
gpu_primary: str |
|
|
gpu_secondary: Optional[str] |
|
|
vram_primary_gb: float |
|
|
vram_secondary_gb: float |
|
|
total_vram_gb: float |
|
|
|
|
|
@property |
|
|
def is_dual_gpu(self) -> bool: |
|
|
return self.gpu_secondary is not None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class OptimizedModelRegistry: |
|
|
""" |
|
|
Hardware-optimized model registry based on 2025 performance research |
|
|
Specifically optimized for RTX 3060 (12GB) + RTX 2060 (6GB) setup |
|
|
""" |
|
|
|
|
|
|
|
|
DUAL_RTX_PROFILE = HardwareProfile( |
|
|
gpu_primary="RTX 3060", |
|
|
gpu_secondary="RTX 2060", |
|
|
vram_primary_gb=12.0, |
|
|
vram_secondary_gb=6.0, |
|
|
total_vram_gb=18.0 |
|
|
) |
|
|
|
|
|
|
|
|
MODELS = { |
|
|
|
|
|
"reasoning_premium": ModelConfig( |
|
|
name="qwen2.5:14b-instruct", |
|
|
size=ModelSize.XLARGE_14B, |
|
|
vram_requirement_gb=9.0, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=1, |
|
|
tokens_per_second_estimate=25, |
|
|
description="Premium reasoning model optimized for RTX 3060" |
|
|
), |
|
|
"reasoning_balanced": ModelConfig( |
|
|
name="deepseek-r1:8b", |
|
|
size=ModelSize.LARGE_8B, |
|
|
vram_requirement_gb=5.2, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=2, |
|
|
tokens_per_second_estimate=35, |
|
|
description="Balanced reasoning with good speed" |
|
|
), |
|
|
"reasoning_fallback": ModelConfig( |
|
|
name="llama3.1:8b", |
|
|
size=ModelSize.LARGE_8B, |
|
|
vram_requirement_gb=4.9, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=3, |
|
|
tokens_per_second_estimate=45, |
|
|
description="Fast reasoning fallback" |
|
|
), |
|
|
|
|
|
|
|
|
"code_optimal": ModelConfig( |
|
|
name="deepseek-coder:6.7b", |
|
|
size=ModelSize.MEDIUM_7B, |
|
|
vram_requirement_gb=3.8, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=1, |
|
|
tokens_per_second_estimate=50, |
|
|
description="Optimal code model for RTX 3060 - best speed/quality" |
|
|
), |
|
|
"code_premium": ModelConfig( |
|
|
name="deepseek-coder:33b", |
|
|
size=ModelSize.PREMIUM_33B, |
|
|
vram_requirement_gb=18.0, |
|
|
recommended_precision=Precision.INT4, |
|
|
performance_tier=5, |
|
|
tokens_per_second_estimate=8, |
|
|
description="Premium code model (slow on this hardware)" |
|
|
), |
|
|
|
|
|
|
|
|
"conversation_fast": ModelConfig( |
|
|
name="llama3.1:8b", |
|
|
size=ModelSize.LARGE_8B, |
|
|
vram_requirement_gb=4.9, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=1, |
|
|
tokens_per_second_estimate=70, |
|
|
description="Fast conversation model - 70+ tokens/s" |
|
|
), |
|
|
"conversation_premium": ModelConfig( |
|
|
name="qwen2.5:14b-instruct", |
|
|
size=ModelSize.XLARGE_14B, |
|
|
vram_requirement_gb=9.0, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=2, |
|
|
tokens_per_second_estimate=25, |
|
|
description="Premium conversation quality" |
|
|
), |
|
|
|
|
|
|
|
|
"tools_specialized": ModelConfig( |
|
|
name="llama3-groq-tool-use:8b", |
|
|
size=ModelSize.LARGE_8B, |
|
|
vram_requirement_gb=4.7, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=1, |
|
|
tokens_per_second_estimate=40, |
|
|
description="Specialized for automation and tools" |
|
|
), |
|
|
|
|
|
|
|
|
"research_balanced": ModelConfig( |
|
|
name="qwen2.5:14b-instruct", |
|
|
size=ModelSize.XLARGE_14B, |
|
|
vram_requirement_gb=9.0, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=1, |
|
|
tokens_per_second_estimate=25, |
|
|
description="Excellent for research and analysis" |
|
|
), |
|
|
|
|
|
|
|
|
"lightweight_3b": ModelConfig( |
|
|
name="llama3.2:3b", |
|
|
size=ModelSize.SMALL_3B, |
|
|
vram_requirement_gb=2.5, |
|
|
recommended_precision=Precision.FP16, |
|
|
performance_tier=4, |
|
|
tokens_per_second_estimate=90, |
|
|
description="Ultra-fast 3B model for RTX 2060" |
|
|
) |
|
|
} |
|
|
|
|
|
@classmethod |
|
|
def get_optimal_model(cls, model_type: ModelType, hardware: HardwareProfile) -> str: |
|
|
"""Get optimal model name for given type and hardware""" |
|
|
|
|
|
|
|
|
optimal_mapping = { |
|
|
ModelType.REASONING: "reasoning_premium", |
|
|
ModelType.CODE: "code_optimal", |
|
|
ModelType.CONVERSATION: "conversation_fast", |
|
|
ModelType.TOOLS: "tools_specialized", |
|
|
ModelType.RESEARCH: "research_balanced", |
|
|
ModelType.GENERAL: "conversation_fast" |
|
|
} |
|
|
|
|
|
model_key = optimal_mapping.get(model_type, "conversation_fast") |
|
|
model_config = cls.MODELS[model_key] |
|
|
|
|
|
|
|
|
if not model_config.is_compatible_with_vram(hardware.vram_primary_gb): |
|
|
fallback_mapping = { |
|
|
ModelType.REASONING: "reasoning_fallback", |
|
|
ModelType.CODE: "code_optimal", |
|
|
ModelType.CONVERSATION: "lightweight_3b", |
|
|
ModelType.TOOLS: "conversation_fast", |
|
|
ModelType.RESEARCH: "reasoning_fallback", |
|
|
ModelType.GENERAL: "lightweight_3b" |
|
|
} |
|
|
model_key = fallback_mapping.get(model_type, "lightweight_3b") |
|
|
model_config = cls.MODELS[model_key] |
|
|
|
|
|
return model_config.name |
|
|
|
|
|
@classmethod |
|
|
def get_fallback_models(cls, model_type: ModelType) -> List[str]: |
|
|
"""Get ordered list of fallback models for type""" |
|
|
|
|
|
fallback_chains = { |
|
|
ModelType.REASONING: [ |
|
|
"reasoning_premium", |
|
|
"reasoning_balanced", |
|
|
"reasoning_fallback", |
|
|
"lightweight_3b" |
|
|
], |
|
|
ModelType.CODE: [ |
|
|
"code_optimal", |
|
|
"reasoning_fallback", |
|
|
"lightweight_3b" |
|
|
], |
|
|
ModelType.CONVERSATION: [ |
|
|
"conversation_fast", |
|
|
"conversation_premium", |
|
|
"lightweight_3b" |
|
|
], |
|
|
ModelType.TOOLS: [ |
|
|
"tools_specialized", |
|
|
"conversation_fast", |
|
|
"lightweight_3b" |
|
|
], |
|
|
ModelType.RESEARCH: [ |
|
|
"research_balanced", |
|
|
"reasoning_fallback", |
|
|
"lightweight_3b" |
|
|
] |
|
|
} |
|
|
|
|
|
fallback_keys = fallback_chains.get(model_type, ["conversation_fast", "lightweight_3b"]) |
|
|
return [cls.MODELS[key].name for key in fallback_keys] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class OllamaConfig: |
|
|
""" |
|
|
Ollama/LLM Configuration optimized for dual GPU setup |
|
|
Based on 2025 performance research and hardware capabilities |
|
|
""" |
|
|
base_url: str = "http://127.0.0.1:11434" |
|
|
timeout: float = 180.0 |
|
|
|
|
|
|
|
|
hardware: HardwareProfile = field(default_factory=lambda: OptimizedModelRegistry.DUAL_RTX_PROFILE) |
|
|
|
|
|
|
|
|
reasoning_model: str = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_optimal_model( |
|
|
ModelType.REASONING, OptimizedModelRegistry.DUAL_RTX_PROFILE |
|
|
) |
|
|
) |
|
|
code_model: str = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_optimal_model( |
|
|
ModelType.CODE, OptimizedModelRegistry.DUAL_RTX_PROFILE |
|
|
) |
|
|
) |
|
|
conversation_model: str = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_optimal_model( |
|
|
ModelType.CONVERSATION, OptimizedModelRegistry.DUAL_RTX_PROFILE |
|
|
) |
|
|
) |
|
|
tools_model: str = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_optimal_model( |
|
|
ModelType.TOOLS, OptimizedModelRegistry.DUAL_RTX_PROFILE |
|
|
) |
|
|
) |
|
|
research_model: str = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_optimal_model( |
|
|
ModelType.RESEARCH, OptimizedModelRegistry.DUAL_RTX_PROFILE |
|
|
) |
|
|
) |
|
|
|
|
|
|
|
|
reasoning_fallbacks: List[str] = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_fallback_models(ModelType.REASONING) |
|
|
) |
|
|
code_fallbacks: List[str] = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_fallback_models(ModelType.CODE) |
|
|
) |
|
|
conversation_fallbacks: List[str] = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_fallback_models(ModelType.CONVERSATION) |
|
|
) |
|
|
tools_fallbacks: List[str] = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_fallback_models(ModelType.TOOLS) |
|
|
) |
|
|
research_fallbacks: List[str] = field( |
|
|
default_factory=lambda: OptimizedModelRegistry.get_fallback_models(ModelType.RESEARCH) |
|
|
) |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Runtime validation of configuration""" |
|
|
if not self.base_url.startswith(('http://', 'https://')): |
|
|
raise ValueError(f"Invalid Ollama URL: {self.base_url}") |
|
|
|
|
|
if self.timeout <= 0: |
|
|
raise ValueError(f"Timeout must be positive: {self.timeout}") |
|
|
|
|
|
|
|
|
logger.info(f"π― Ollama Config Optimized for {self.hardware.gpu_primary} + {self.hardware.gpu_secondary}") |
|
|
logger.info(f"π» Reasoning: {self.reasoning_model}") |
|
|
logger.info(f"π§ Code: {self.code_model}") |
|
|
logger.info(f"π¬ Conversation: {self.conversation_model}") |
|
|
logger.info(f"π οΈ Tools: {self.tools_model}") |
|
|
logger.info(f"π¬ Research: {self.research_model}") |
|
|
|
|
|
@classmethod |
|
|
def from_env(cls) -> 'OllamaConfig': |
|
|
"""Create config from environment variables with overrides""" |
|
|
base_url = os.getenv("OLLAMA_BASE_URL", "http://127.0.0.1:11434") |
|
|
timeout = float(os.getenv("OLLAMA_TIMEOUT", "180.0")) |
|
|
|
|
|
|
|
|
reasoning_override = os.getenv("HASHIRU_REASONING_MODEL") |
|
|
code_override = os.getenv("HASHIRU_CODE_MODEL") |
|
|
conversation_override = os.getenv("HASHIRU_CONVERSATION_MODEL") |
|
|
tools_override = os.getenv("HASHIRU_TOOLS_MODEL") |
|
|
|
|
|
config = cls(base_url=base_url, timeout=timeout) |
|
|
|
|
|
|
|
|
if reasoning_override: |
|
|
object.__setattr__(config, 'reasoning_model', reasoning_override) |
|
|
if code_override: |
|
|
object.__setattr__(config, 'code_model', code_override) |
|
|
if conversation_override: |
|
|
object.__setattr__(config, 'conversation_model', conversation_override) |
|
|
if tools_override: |
|
|
object.__setattr__(config, 'tools_model', tools_override) |
|
|
|
|
|
return config |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class SecurityConfig: |
|
|
"""Enhanced security configuration with type safety""" |
|
|
|
|
|
|
|
|
free_project_path: Path = field(default_factory=lambda: Path("C:/hashiru_workspace")) |
|
|
|
|
|
|
|
|
allowed_paths: Tuple[str, ...] = ( |
|
|
".", |
|
|
"tools", "utils", "scripts", "artifacts", "research", "screenshots", "logs", |
|
|
"C:/hashiru_workspace", |
|
|
"C:/Users/Public", |
|
|
"C:/temp", "C:/tmp", |
|
|
) |
|
|
|
|
|
|
|
|
blocked_paths: Tuple[str, ...] = ( |
|
|
"C:/Windows/System32", |
|
|
"C:/Program Files", |
|
|
"C:/Program Files (x86)", |
|
|
".git", "__pycache__", "venv", ".venv", "node_modules" |
|
|
) |
|
|
|
|
|
|
|
|
blocked_extensions: Tuple[str, ...] = ( |
|
|
".exe", ".dll", ".sys", ".bat", ".cmd", ".scr", ".pif" |
|
|
) |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Validate and setup security configuration""" |
|
|
try: |
|
|
self.free_project_path.mkdir(parents=True, exist_ok=True) |
|
|
logger.info(f"π Security: Free workspace at {self.free_project_path}") |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Could not create free workspace: {e}") |
|
|
|
|
|
def is_write_allowed(self, target_path: Union[str, Path]) -> bool: |
|
|
"""Enhanced path validation with detailed logging""" |
|
|
try: |
|
|
resolved = Path(target_path).resolve() |
|
|
path_str = str(resolved).replace("\\", "/") |
|
|
|
|
|
|
|
|
if resolved.suffix.lower() in self.blocked_extensions: |
|
|
logger.warning(f"π« Blocked extension: {resolved.suffix}") |
|
|
return False |
|
|
|
|
|
|
|
|
for blocked in self.blocked_paths: |
|
|
if path_str.lower().startswith(blocked.lower()): |
|
|
logger.warning(f"π« Blocked path: {blocked}") |
|
|
return False |
|
|
|
|
|
|
|
|
for allowed in self.allowed_paths: |
|
|
allowed_resolved = str(Path(allowed).resolve()).replace("\\", "/") |
|
|
if path_str.startswith(allowed_resolved): |
|
|
logger.debug(f"β
Allowed path: {allowed}") |
|
|
return True |
|
|
|
|
|
|
|
|
home_path = str(Path.home()).replace("\\", "/") |
|
|
if path_str.startswith(home_path): |
|
|
logger.debug("β
User home directory access") |
|
|
return True |
|
|
|
|
|
logger.warning(f"π« Path not in allowed list: {path_str}") |
|
|
return False |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Path validation error: {e}") |
|
|
return False |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class PerformanceConfig: |
|
|
"""Performance optimization settings based on 2025 research""" |
|
|
|
|
|
|
|
|
enable_gpu_memory_optimization: bool = True |
|
|
max_concurrent_requests: int = 4 |
|
|
enable_quantization: bool = True |
|
|
preferred_precision: Precision = Precision.FP16 |
|
|
|
|
|
|
|
|
enable_flash_attention: bool = True |
|
|
enable_kv_cache_optimization: bool = True |
|
|
batch_size_optimization: bool = True |
|
|
|
|
|
|
|
|
enable_tensor_parallelism: bool = True |
|
|
enable_pipeline_parallelism: bool = False |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Log performance configuration""" |
|
|
logger.info(f"β‘ Performance: GPU Memory Optimization: {self.enable_gpu_memory_optimization}") |
|
|
logger.info(f"β‘ Performance: Quantization: {self.enable_quantization}") |
|
|
logger.info(f"β‘ Performance: Flash Attention: {self.enable_flash_attention}") |
|
|
logger.info(f"β‘ Performance: Tensor Parallelism: {self.enable_tensor_parallelism}") |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class SystemConfig: |
|
|
"""System-level configuration with validation""" |
|
|
|
|
|
project_root: Path = field(default_factory=lambda: Path(__file__).parent) |
|
|
|
|
|
|
|
|
tools_dir: Path = field(default_factory=lambda: Path("tools")) |
|
|
utils_dir: Path = field(default_factory=lambda: Path("utils")) |
|
|
artifacts_dir: Path = field(default_factory=lambda: Path("artifacts")) |
|
|
backups_dir: Path = field(default_factory=lambda: Path("backups")) |
|
|
research_dir: Path = field(default_factory=lambda: Path("research")) |
|
|
screenshots_dir: Path = field(default_factory=lambda: Path("screenshots")) |
|
|
logs_dir: Path = field(default_factory=lambda: Path("logs")) |
|
|
|
|
|
|
|
|
encoding: str = "utf-8" |
|
|
chainlit_port: int = 8080 |
|
|
max_commands_per_execution: int = 20 |
|
|
debug_mode: bool = field(default_factory=lambda: os.getenv("DEBUG", "false").lower() == "true") |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Setup and validate directories""" |
|
|
directories = [ |
|
|
self.artifacts_dir, self.backups_dir, self.research_dir, |
|
|
self.screenshots_dir, self.logs_dir |
|
|
] |
|
|
|
|
|
for directory in directories: |
|
|
try: |
|
|
directory.mkdir(exist_ok=True) |
|
|
logger.debug(f"π Directory ready: {directory}") |
|
|
except Exception as e: |
|
|
logger.warning(f"β οΈ Could not create {directory}: {e}") |
|
|
|
|
|
logger.info(f"π§ System: Project root: {self.project_root}") |
|
|
logger.info(f"π§ System: Debug mode: {self.debug_mode}") |
|
|
|
|
|
@dataclass(frozen=True) |
|
|
class Config: |
|
|
""" |
|
|
Main configuration class - Hardware-optimized for RTX 3060 + RTX 2060 |
|
|
Based on 2025 research and best practices |
|
|
""" |
|
|
|
|
|
|
|
|
ollama: OllamaConfig = field(default_factory=OllamaConfig) |
|
|
security: SecurityConfig = field(default_factory=SecurityConfig) |
|
|
performance: PerformanceConfig = field(default_factory=PerformanceConfig) |
|
|
system: SystemConfig = field(default_factory=SystemConfig) |
|
|
|
|
|
|
|
|
autonomous_mode: bool = True |
|
|
self_modification_enabled: bool = True |
|
|
|
|
|
|
|
|
startup_banner: str = "π HASHIRU 6.1 - Agente AutΓ΄nomo Inteligente" |
|
|
processing_message: str = "π§ Processando com IA..." |
|
|
executing_message: str = "β‘ Executando automaticamente..." |
|
|
|
|
|
|
|
|
version: str = "6.1.2025" |
|
|
build_date: str = field(default_factory=lambda: "2025-08-04") |
|
|
|
|
|
def __post_init__(self): |
|
|
"""Post-initialization validation and setup""" |
|
|
|
|
|
|
|
|
if not (1024 <= self.system.chainlit_port <= 65535): |
|
|
raise ValueError(f"Invalid port: {self.system.chainlit_port}") |
|
|
|
|
|
|
|
|
logger.info(f"π HASHIRU {self.version} Configuration Loaded") |
|
|
logger.info(f"π Hardware: {self.ollama.hardware.gpu_primary} + {self.ollama.hardware.gpu_secondary}") |
|
|
logger.info(f"πΎ Total VRAM: {self.ollama.hardware.total_vram_gb}GB") |
|
|
logger.info(f"π§ Autonomous Mode: {self.autonomous_mode}") |
|
|
logger.info(f"π Self-Modification: {self.self_modification_enabled}") |
|
|
|
|
|
|
|
|
logger.info("π€ Model Assignments:") |
|
|
logger.info(f" π§ Reasoning: {self.ollama.reasoning_model}") |
|
|
logger.info(f" π» Code: {self.ollama.code_model}") |
|
|
logger.info(f" π¬ Conversation: {self.ollama.conversation_model}") |
|
|
logger.info(f" π οΈ Tools: {self.ollama.tools_model}") |
|
|
logger.info(f" π¬ Research: {self.ollama.research_model}") |
|
|
|
|
|
@classmethod |
|
|
def load(cls) -> 'Config': |
|
|
"""Load configuration with environment overrides and validation""" |
|
|
try: |
|
|
|
|
|
ollama_config = OllamaConfig.from_env() |
|
|
|
|
|
|
|
|
autonomous_mode = os.getenv("HASHIRU_AUTONOMOUS", "true").lower() == "true" |
|
|
self_modification = os.getenv("HASHIRU_SELF_MOD", "true").lower() == "true" |
|
|
|
|
|
|
|
|
config = cls( |
|
|
ollama=ollama_config, |
|
|
autonomous_mode=autonomous_mode, |
|
|
self_modification_enabled=self_modification |
|
|
) |
|
|
|
|
|
logger.info("β
Configuration loaded successfully") |
|
|
return config |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"β Configuration loading failed: {e}") |
|
|
raise |
|
|
|
|
|
def get_model(self, model_type: str) -> str: |
|
|
"""Get primary model for type with improved mapping""" |
|
|
|
|
|
|
|
|
model_map = { |
|
|
"reasoning": self.ollama.reasoning_model, |
|
|
"code": self.ollama.code_model, |
|
|
"code_specialist": self.ollama.code_model, |
|
|
"code_master": self.ollama.code_model, |
|
|
"programming": self.ollama.code_model, |
|
|
"conversation": self.ollama.conversation_model, |
|
|
"chat": self.ollama.conversation_model, |
|
|
"tools": self.ollama.tools_model, |
|
|
"automation": self.ollama.tools_model, |
|
|
"research": self.ollama.research_model, |
|
|
"analysis": self.ollama.research_model, |
|
|
"general": self.ollama.conversation_model, |
|
|
"default": self.ollama.conversation_model |
|
|
} |
|
|
|
|
|
model = model_map.get(model_type.lower(), self.ollama.conversation_model) |
|
|
logger.debug(f"π― Model selection: {model_type} -> {model}") |
|
|
return model |
|
|
|
|
|
def get_fallback_models(self, model_type: str) -> List[str]: |
|
|
"""Get fallback models for type with enhanced mapping""" |
|
|
|
|
|
fallback_map = { |
|
|
"reasoning": self.ollama.reasoning_fallbacks, |
|
|
"code": self.ollama.code_fallbacks, |
|
|
"code_specialist": self.ollama.code_fallbacks, |
|
|
"programming": self.ollama.code_fallbacks, |
|
|
"conversation": self.ollama.conversation_fallbacks, |
|
|
"chat": self.ollama.conversation_fallbacks, |
|
|
"tools": self.ollama.tools_fallbacks, |
|
|
"automation": self.ollama.tools_fallbacks, |
|
|
"research": self.ollama.research_fallbacks, |
|
|
"analysis": self.ollama.research_fallbacks, |
|
|
"general": self.ollama.conversation_fallbacks, |
|
|
} |
|
|
|
|
|
fallbacks = fallback_map.get(model_type.lower(), self.ollama.conversation_fallbacks) |
|
|
logger.debug(f"π Fallback chain: {model_type} -> {fallbacks}") |
|
|
return fallbacks |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
config = Config.load() |
|
|
logger.info("π HASHIRU Configuration initialized successfully") |
|
|
except Exception as e: |
|
|
logger.error(f"π₯ Configuration initialization failed: {e}") |
|
|
|
|
|
config = Config() |
|
|
logger.warning("β οΈ Using default configuration as fallback") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_ai_model(model_type: str) -> str: |
|
|
"""Legacy: Get AI model with enhanced logging""" |
|
|
model = config.get_model(model_type) |
|
|
logger.debug(f"π Legacy get_ai_model: {model_type} -> {model}") |
|
|
return model |
|
|
|
|
|
def get_fallback_models(model_type: str) -> List[str]: |
|
|
"""Legacy: Get fallback models""" |
|
|
return config.get_fallback_models(model_type) |
|
|
|
|
|
def is_write_path_allowed(target_path: str) -> bool: |
|
|
"""Legacy: Check if write is allowed""" |
|
|
return config.security.is_write_allowed(target_path) |
|
|
|
|
|
def is_command_auto_allowed(command: str) -> bool: |
|
|
"""Legacy: Check if command is auto-allowed""" |
|
|
allowed = config.autonomous_mode |
|
|
logger.debug(f"π€ Auto command check: {command} -> {allowed}") |
|
|
return allowed |
|
|
|
|
|
def is_dangerous_command_allowed(command: str) -> bool: |
|
|
"""Legacy: Check if dangerous command is allowed""" |
|
|
allowed = config.autonomous_mode |
|
|
logger.debug(f"β οΈ Dangerous command check: {command} -> {allowed}") |
|
|
return allowed |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
OLLAMA_URL = config.ollama.base_url |
|
|
AUTONOMOUS_MODE = config.autonomous_mode |
|
|
SELF_MODIFICATION_ENABLED = config.self_modification_enabled |
|
|
STARTUP_BANNER = config.startup_banner |
|
|
PROCESSING_MESSAGE = config.processing_message |
|
|
EXECUTING_MESSAGE = config.executing_message |
|
|
|
|
|
|
|
|
HARDWARE_PROFILE = config.ollama.hardware |
|
|
PRIMARY_GPU = config.ollama.hardware.gpu_primary |
|
|
SECONDARY_GPU = config.ollama.hardware.gpu_secondary |
|
|
TOTAL_VRAM_GB = config.ollama.hardware.total_vram_gb |
|
|
|
|
|
|
|
|
CURRENT_MODELS = { |
|
|
"reasoning": config.ollama.reasoning_model, |
|
|
"code": config.ollama.code_model, |
|
|
"conversation": config.ollama.conversation_model, |
|
|
"tools": config.ollama.tools_model, |
|
|
"research": config.ollama.research_model |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
__all__ = [ |
|
|
|
|
|
"Config", "config", |
|
|
|
|
|
|
|
|
"OptimizedModelRegistry", "ModelConfig", "HardwareProfile", |
|
|
"ModelSize", "Precision", "ModelType", |
|
|
|
|
|
|
|
|
"OllamaConfig", "SecurityConfig", "PerformanceConfig", "SystemConfig", |
|
|
|
|
|
|
|
|
"get_ai_model", "get_fallback_models", |
|
|
"is_write_path_allowed", "is_command_auto_allowed", "is_dangerous_command_allowed", |
|
|
|
|
|
|
|
|
"OLLAMA_URL", "AUTONOMOUS_MODE", "SELF_MODIFICATION_ENABLED", |
|
|
"STARTUP_BANNER", "PROCESSING_MESSAGE", "EXECUTING_MESSAGE", |
|
|
|
|
|
|
|
|
"HARDWARE_PROFILE", "PRIMARY_GPU", "SECONDARY_GPU", "TOTAL_VRAM_GB", |
|
|
"CURRENT_MODELS" |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if config.system.debug_mode: |
|
|
logger.info("π§ HASHIRU Config loaded in DEBUG mode") |
|
|
logger.info(f"π Free workspace: {config.security.free_project_path}") |
|
|
logger.info(f"π― Optimized for: {PRIMARY_GPU} ({config.ollama.hardware.vram_primary_gb}GB) + {SECONDARY_GPU} ({config.ollama.hardware.vram_secondary_gb}GB)") |
|
|
logger.info(f"π€ Active models: {CURRENT_MODELS}") |
|
|
logger.info(f"β‘ Performance optimizations: Flash Attention, KV Cache, Quantization") |
|
|
|
|
|
|
|
|
logger.info("π‘ Model Performance Estimates:") |
|
|
for model_type, model_name in CURRENT_MODELS.items(): |
|
|
if model_name in [model.name for model in OptimizedModelRegistry.MODELS.values()]: |
|
|
model_config = next(m for m in OptimizedModelRegistry.MODELS.values() if m.name == model_name) |
|
|
logger.info(f" {model_type}: ~{model_config.tokens_per_second_estimate} tokens/s ({model_config.vram_requirement_gb}GB)") |
|
|
|
|
|
logger.info("π HASHIRU 6.1 Optimized Configuration Ready!") |