File size: 2,286 Bytes
6bbf552
 
 
 
 
 
 
 
 
a8afc36
6bbf552
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8afc36
 
 
6bbf552
 
 
 
 
 
 
 
 
 
 
 
 
a8afc36
b483ca7
6bbf552
 
 
 
 
a8afc36
 
 
 
 
 
6bbf552
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import subprocess
from pathlib import Path


DEFAULT_MINICPM_MODEL_PATH = Path("models/minicpm-v-4.6-gguf/MiniCPM-V-4.6-Q4_K_M.gguf")
DEFAULT_CARD_PORT = 8090
DEFAULT_APP_PORT = 7860
DEFAULT_BOSS_MODEL = "mlx-community/Nemotron-Mini-4B-Instruct-4bit-mlx"
DEFAULT_ART_MODEL = "stabilityai/sdxl-turbo"


# Return the OpenAI-compatible chat endpoint for a local port.
def chat_endpoint(port: int) -> str:
    return f"http://127.0.0.1:{port}/v1/chat/completions"


# Return the llama.cpp server command for the local MiniCPM GGUF.
def minicpm_server_command(model_path: Path = DEFAULT_MINICPM_MODEL_PATH, port: int = DEFAULT_CARD_PORT) -> tuple[str, ...]:
    return (
        "llama-server",
        "--model",
        str(model_path),
        "--host",
        "127.0.0.1",
        "--port",
        str(port),
        "--ctx-size",
        "16384",
        "--parallel",
        "4",
        "--n-gpu-layers",
        "99",
    )


# Return the environment that forces MiniCPM card authoring and Nemotron boss play.
def local_ai_env(card_port: int = DEFAULT_CARD_PORT) -> dict[str, str]:
    env = dict(os.environ)
    env.update(
        {
            "TABRAS_CARD_BACKEND": "llamacpp",
            "TABRAS_CARD_ENDPOINT": chat_endpoint(card_port),
            "TABRAS_CARD_MODEL": "minicpm-v-4.6-q4",
            "TABRAS_CARD_TEMPERATURE": "0.7",
            "TABRAS_CARD_MAX_TOKENS": "256",
            "TABRAS_AI_BOSS": "1",
            "TABRAS_BOSS_BACKEND": "mlx",
            "TABRAS_BOSS_MODEL": DEFAULT_BOSS_MODEL,
            "TABRAS_BOSS_TEMPERATURE": "0.2",
            "TABRAS_BOSS_MAX_TOKENS": "96",
            "TABRAS_ART_BACKEND": "diffusers",
            "TABRAS_ART_MODEL": DEFAULT_ART_MODEL,
            "TABRAS_ART_STEPS": "4",
            "TABRAS_ART_GUIDANCE": "0.0",
            "TABRAS_ART_WIDTH": "512",
            "TABRAS_ART_HEIGHT": "320",
        }
    )
    return env


# Start the MiniCPM llama.cpp server process.
def start_minicpm_server(
    model_path: Path = DEFAULT_MINICPM_MODEL_PATH,
    port: int = DEFAULT_CARD_PORT,
) -> subprocess.Popen[str]:
    if not model_path.exists():
        raise FileNotFoundError(f"MiniCPM model not found: {model_path}")
    return subprocess.Popen(minicpm_server_command(model_path, port), text=True)