File size: 2,311 Bytes
6216b58
 
 
 
 
 
 
 
 
 
 
55fc7ef
c4b8a5d
 
 
55fc7ef
 
c4b8a5d
 
 
 
 
 
55fc7ef
 
 
 
 
6216b58
 
 
55fc7ef
 
 
 
c4b8a5d
55fc7ef
 
 
 
6216b58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4b8a5d
 
55fc7ef
6216b58
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
{
  "model_id": "glm-5.2-visual-runtime",
  "kind": "training_free_multimodal_gateway",
  "checkpoint_repo": null,
  "reasoning_model": {
    "provider": "vllm_openai_compatible",
    "recommended_checkpoint": "zai-org/GLM-5.2-FP8",
    "served_model_name": "glm-5.2",
    "minimum_vllm_version": "0.23.0",
    "recommended_image": "vllm/vllm-openai:glm52"
  },
  "vision_model": {
    "provider": "local_vllm_omni_openai_compatible",
    "recommended_checkpoint": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
    "served_model_name": "qwen3-omni",
    "endpoint": "http://vision-vllm:8000/v1"
  },
  "alternate_reasoning_model": {
    "provider": "vllm_openai_compatible",
    "recommended_checkpoint": "Qwen/Qwen3.6-27B",
    "served_model_name": "qwen3.6-27b",
    "minimum_vllm_version": "0.19.0"
  },
  "ocr_model": {
    "provider": "local_container",
    "implementation": "one_click/ocr",
    "endpoint": "http://ocr:8080/ocr"
  },
  "visual_runtime": {
    "space_repo": "wassemgtk/glm-5-2-visual-runtime-space",
    "public_model_id": "glm-5.2-visual-runtime",
    "default_mode": "local",
    "local_mode_includes": [
      "gateway",
      "zai-org/GLM-5.2-FP8 via vLLM",
      "Qwen/Qwen3-Omni-30B-A3B-Instruct via vLLM-Omni",
      "local OCR container",
      "PostgreSQL",
      "MinIO"
    ],
    "cloud_mode_requires": [
      "GLM_BASE_URL",
      "VISION_BASE_URL",
      "DATABASE_URL",
      "OBJECT_STORE_ENDPOINT"
    ]
  },
  "vllm_arguments": {
    "model": "zai-org/GLM-5.2-FP8",
    "served_model_name": "glm-5.2",
    "kv_cache_dtype": "fp8",
    "tensor_parallel_size": 8,
    "tool_call_parser": "glm47",
    "reasoning_parser": "glm45",
    "enable_auto_tool_choice": true,
    "speculative_config": {
      "method": "mtp",
      "num_speculative_tokens": 5
    }
  },
  "openai_compatible_endpoints": [
    "/v1/chat/completions",
    "/v1/responses"
  ],
  "notes": [
    "This repo intentionally does not include GLM-5.2 weights.",
    "The one-click profile pulls GLM-5.2-FP8 and Qwen3-Omni weights from Hugging Face at deployment time unless models/ has been materialized.",
    "Run scripts/materialize_weights.py and upload models/ if you want all checkpoint weights physically stored in this repo.",
    "No hosted model provider is required in VISUAL_RUNTIME_MODE=local."
  ]
}