Spaces:
Running
Running
Codex
Trim prefetch to 1 pack, log model-pack failures, JSON-constrained cards, rotate fallback names
b483ca7 | import os | |
| import subprocess | |
| from pathlib import Path | |
| DEFAULT_MINICPM_MODEL_PATH = Path("models/minicpm-v-4.6-gguf/MiniCPM-V-4.6-Q4_K_M.gguf") | |
| DEFAULT_CARD_PORT = 8090 | |
| DEFAULT_APP_PORT = 7860 | |
| DEFAULT_BOSS_MODEL = "mlx-community/Nemotron-Mini-4B-Instruct-4bit-mlx" | |
| DEFAULT_ART_MODEL = "stabilityai/sdxl-turbo" | |
| # Return the OpenAI-compatible chat endpoint for a local port. | |
| def chat_endpoint(port: int) -> str: | |
| return f"http://127.0.0.1:{port}/v1/chat/completions" | |
| # Return the llama.cpp server command for the local MiniCPM GGUF. | |
| def minicpm_server_command(model_path: Path = DEFAULT_MINICPM_MODEL_PATH, port: int = DEFAULT_CARD_PORT) -> tuple[str, ...]: | |
| return ( | |
| "llama-server", | |
| "--model", | |
| str(model_path), | |
| "--host", | |
| "127.0.0.1", | |
| "--port", | |
| str(port), | |
| "--ctx-size", | |
| "16384", | |
| "--parallel", | |
| "4", | |
| "--n-gpu-layers", | |
| "99", | |
| ) | |
| # Return the environment that forces MiniCPM card authoring and Nemotron boss play. | |
| def local_ai_env(card_port: int = DEFAULT_CARD_PORT) -> dict[str, str]: | |
| env = dict(os.environ) | |
| env.update( | |
| { | |
| "TABRAS_CARD_BACKEND": "llamacpp", | |
| "TABRAS_CARD_ENDPOINT": chat_endpoint(card_port), | |
| "TABRAS_CARD_MODEL": "minicpm-v-4.6-q4", | |
| "TABRAS_CARD_TEMPERATURE": "0.7", | |
| "TABRAS_CARD_MAX_TOKENS": "256", | |
| "TABRAS_AI_BOSS": "1", | |
| "TABRAS_BOSS_BACKEND": "mlx", | |
| "TABRAS_BOSS_MODEL": DEFAULT_BOSS_MODEL, | |
| "TABRAS_BOSS_TEMPERATURE": "0.2", | |
| "TABRAS_BOSS_MAX_TOKENS": "96", | |
| "TABRAS_ART_BACKEND": "diffusers", | |
| "TABRAS_ART_MODEL": DEFAULT_ART_MODEL, | |
| "TABRAS_ART_STEPS": "4", | |
| "TABRAS_ART_GUIDANCE": "0.0", | |
| "TABRAS_ART_WIDTH": "512", | |
| "TABRAS_ART_HEIGHT": "320", | |
| } | |
| ) | |
| return env | |
| # Start the MiniCPM llama.cpp server process. | |
| def start_minicpm_server( | |
| model_path: Path = DEFAULT_MINICPM_MODEL_PATH, | |
| port: int = DEFAULT_CARD_PORT, | |
| ) -> subprocess.Popen[str]: | |
| if not model_path.exists(): | |
| raise FileNotFoundError(f"MiniCPM model not found: {model_path}") | |
| return subprocess.Popen(minicpm_server_command(model_path, port), text=True) | |