tabras / ai_runtime.py
Codex
Trim prefetch to 1 pack, log model-pack failures, JSON-constrained cards, rotate fallback names
b483ca7
Raw
History Blame Contribute Delete
2.29 kB
import os
import subprocess
from pathlib import Path
DEFAULT_MINICPM_MODEL_PATH = Path("models/minicpm-v-4.6-gguf/MiniCPM-V-4.6-Q4_K_M.gguf")
DEFAULT_CARD_PORT = 8090
DEFAULT_APP_PORT = 7860
DEFAULT_BOSS_MODEL = "mlx-community/Nemotron-Mini-4B-Instruct-4bit-mlx"
DEFAULT_ART_MODEL = "stabilityai/sdxl-turbo"
# Return the OpenAI-compatible chat endpoint for a local port.
def chat_endpoint(port: int) -> str:
return f"http://127.0.0.1:{port}/v1/chat/completions"
# Return the llama.cpp server command for the local MiniCPM GGUF.
def minicpm_server_command(model_path: Path = DEFAULT_MINICPM_MODEL_PATH, port: int = DEFAULT_CARD_PORT) -> tuple[str, ...]:
return (
"llama-server",
"--model",
str(model_path),
"--host",
"127.0.0.1",
"--port",
str(port),
"--ctx-size",
"16384",
"--parallel",
"4",
"--n-gpu-layers",
"99",
)
# Return the environment that forces MiniCPM card authoring and Nemotron boss play.
def local_ai_env(card_port: int = DEFAULT_CARD_PORT) -> dict[str, str]:
env = dict(os.environ)
env.update(
{
"TABRAS_CARD_BACKEND": "llamacpp",
"TABRAS_CARD_ENDPOINT": chat_endpoint(card_port),
"TABRAS_CARD_MODEL": "minicpm-v-4.6-q4",
"TABRAS_CARD_TEMPERATURE": "0.7",
"TABRAS_CARD_MAX_TOKENS": "256",
"TABRAS_AI_BOSS": "1",
"TABRAS_BOSS_BACKEND": "mlx",
"TABRAS_BOSS_MODEL": DEFAULT_BOSS_MODEL,
"TABRAS_BOSS_TEMPERATURE": "0.2",
"TABRAS_BOSS_MAX_TOKENS": "96",
"TABRAS_ART_BACKEND": "diffusers",
"TABRAS_ART_MODEL": DEFAULT_ART_MODEL,
"TABRAS_ART_STEPS": "4",
"TABRAS_ART_GUIDANCE": "0.0",
"TABRAS_ART_WIDTH": "512",
"TABRAS_ART_HEIGHT": "320",
}
)
return env
# Start the MiniCPM llama.cpp server process.
def start_minicpm_server(
model_path: Path = DEFAULT_MINICPM_MODEL_PATH,
port: int = DEFAULT_CARD_PORT,
) -> subprocess.Popen[str]:
if not model_path.exists():
raise FileNotFoundError(f"MiniCPM model not found: {model_path}")
return subprocess.Popen(minicpm_server_command(model_path, port), text=True)