import os import subprocess from pathlib import Path DEFAULT_MINICPM_MODEL_PATH = Path("models/minicpm-v-4.6-gguf/MiniCPM-V-4.6-Q4_K_M.gguf") DEFAULT_CARD_PORT = 8090 DEFAULT_APP_PORT = 7860 DEFAULT_BOSS_MODEL = "mlx-community/Nemotron-Mini-4B-Instruct-4bit-mlx" DEFAULT_ART_MODEL = "stabilityai/sdxl-turbo" # Return the OpenAI-compatible chat endpoint for a local port. def chat_endpoint(port: int) -> str: return f"http://127.0.0.1:{port}/v1/chat/completions" # Return the llama.cpp server command for the local MiniCPM GGUF. def minicpm_server_command(model_path: Path = DEFAULT_MINICPM_MODEL_PATH, port: int = DEFAULT_CARD_PORT) -> tuple[str, ...]: return ( "llama-server", "--model", str(model_path), "--host", "127.0.0.1", "--port", str(port), "--ctx-size", "16384", "--parallel", "4", "--n-gpu-layers", "99", ) # Return the environment that forces MiniCPM card authoring and Nemotron boss play. def local_ai_env(card_port: int = DEFAULT_CARD_PORT) -> dict[str, str]: env = dict(os.environ) env.update( { "TABRAS_CARD_BACKEND": "llamacpp", "TABRAS_CARD_ENDPOINT": chat_endpoint(card_port), "TABRAS_CARD_MODEL": "minicpm-v-4.6-q4", "TABRAS_CARD_TEMPERATURE": "0.7", "TABRAS_CARD_MAX_TOKENS": "256", "TABRAS_AI_BOSS": "1", "TABRAS_BOSS_BACKEND": "mlx", "TABRAS_BOSS_MODEL": DEFAULT_BOSS_MODEL, "TABRAS_BOSS_TEMPERATURE": "0.2", "TABRAS_BOSS_MAX_TOKENS": "96", "TABRAS_ART_BACKEND": "diffusers", "TABRAS_ART_MODEL": DEFAULT_ART_MODEL, "TABRAS_ART_STEPS": "4", "TABRAS_ART_GUIDANCE": "0.0", "TABRAS_ART_WIDTH": "512", "TABRAS_ART_HEIGHT": "320", } ) return env # Start the MiniCPM llama.cpp server process. def start_minicpm_server( model_path: Path = DEFAULT_MINICPM_MODEL_PATH, port: int = DEFAULT_CARD_PORT, ) -> subprocess.Popen[str]: if not model_path.exists(): raise FileNotFoundError(f"MiniCPM model not found: {model_path}") return subprocess.Popen(minicpm_server_command(model_path, port), text=True)