vost commited on
Commit
7881d19
·
verified ·
1 Parent(s): e63f7e9

Delete config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +0 -161
config.yaml DELETED
@@ -1,161 +0,0 @@
1
- healthCheckTimeout: 1500
2
- startPort: 65001
3
- ttl: 1200
4
-
5
- # --- Macros Globais ---
6
- macros:
7
- "base_vRAM": >-
8
- --privileged --label ai-type=worker --ulimit memlock=-1 --ulimit stack=67108864
9
- --device /dev/dri:/dev/dri
10
- --device /dev/accel/accel0:/dev/accel/accel0
11
- -v /home/lvivas/Modelos/models:/root/.cache/
12
- -v /tmp/cache_file:/tmp/cache_file
13
- -v /home/lvivas/Modelos/grammar:/tmp/grammar
14
- -e ZES_ENABLE_SYSMAN=1 -e GGML_SYCL_UNIFIED_SHARED_MEMORY=1 -e GGML_SYCL_OVERRIDE_ALLOCATOR=1
15
-
16
- "base_Opts": "--ctx-size 32768 --no-webui --gpu-layers 99 --fit on -t 10 --prio 3 --poll 25 --perf --mlock --no-mmap --split-mode none --flash-attn on"
17
-
18
- "kv_cache_opt": >-
19
- --cache-type-k q8_0 --cache-type-v q8_0 --swa-full
20
- --ctx-checkpoints 10 --batch-size 4096 --ubatch-size 1024
21
-
22
- "draft_qwen_08": >-
23
- -hfd mradermacher/Huihui-Qwen3.5-0.8B-abliterated-GGUF:Q5_K_M
24
- --draft 5 -ctkd q8_0 -ctvd q8_0
25
-
26
- "run_intel": >-
27
- -e ONEAPI_DEVICE_SELECTOR=level_zero:0
28
- ghcr.io/ggml-org/llama.cpp:server-intel
29
-
30
- #Last Stable b8445
31
- "run_vulkan": >-
32
- ghcr.io/ggml-org/llama.cpp:server-vulkan
33
-
34
- "run_test": >-
35
- ghcr.io/ggml-org/llama.cpp:server-vulkan
36
-
37
- "run_openvino": >-
38
- -e GGML_OPENVINO_DEVICE=GPU
39
- -e GGML_OPENVINO_PREFILL_CHUNK_SIZE=256
40
- ghcr.io/ggml-org/llama.cpp:server-openvino
41
-
42
- "chat_default": >-
43
- ${kv_cache_opt} ${base_Opts} --reasoning off
44
- --temp 0.6 --top-p 0.95 --top-k 50 --min-p 0.05
45
- --repeat-penalty 1.05 --presence-penalty 1.0 --repeat-last-n 1024
46
- --parallel 2
47
-
48
- "code_default": >-
49
- ${base_Opts} ${kv_cache_opt} --reasoning on
50
- --temp 0.2 --top-p 0.95 --top-k 20 --min-p 0.00
51
- --repeat-penalty 1.15 --parallel 1
52
-
53
- # --- Grupos ---
54
- groups:
55
- "Coding":
56
- swap: false
57
- exclusive: true
58
- members:
59
- - "Qwen3.5-9B"
60
- - "nomic-embed-text-v1.5"
61
-
62
- "Chat":
63
- swap: false
64
- exclusive: true
65
- members:
66
- - "JOSIE-4B"
67
- - "Darkidol-Ballad-9B"
68
-
69
- hooks:
70
- on_startup:
71
- preload:
72
- - "Darkidol-Ballad-9B"
73
-
74
- # --- Modelos ---
75
- models:
76
- "nomic-embed-text-v1.5":
77
- proxy: "http://127.0.0.1:${PORT}"
78
- cmd: |
79
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name nomic-V
80
- ${run_vulkan}
81
- -hf nomic-ai/nomic-embed-text-v1.5-GGUF:Q4_K_M
82
- --embeddings --ctx-size 8192 --gpu-layers 99 --parallel 1 --flash-attn on
83
- cmdStop: "docker stop nomic-V"
84
-
85
- "JOSIE-4B":
86
- proxy: "http://127.0.0.1:${PORT}"
87
- cmd: |
88
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name JOSIE
89
- ${run_openvino}
90
- -hf mradermacher/JOSIE-4B-Instruct-GGUF:Q4_K_M
91
- ${chat_default}
92
- cmdStop: "docker stop JOSIE"
93
-
94
- "Qwen3.5-9B":
95
- proxy: "http://127.0.0.1:${PORT}"
96
- cmd: |
97
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name Qwen3.5-9B
98
- ${run_intel}
99
- -hf mradermacher/Qwen3.5-9B-ultra-heretic-GGUF:Q4_K_M
100
- ${code_default} --seed 3407
101
- cmdStop: "docker stop Qwen3.5-9B"
102
-
103
- "Character-Creator":
104
- proxy: "http://127.0.0.1:${PORT}"
105
- cmd: |
106
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name Character-Creator
107
- ${run_vulkan}
108
- -hf mradermacher/Llama-3.3-8B-Character-Creator-V2-GGUF:Q4_K_M
109
- ${chat_default}
110
- cmdStop: "docker stop Character-Creator"
111
-
112
- "Impish_Bloodmoon_12B":
113
- proxy: "http://127.0.0.1:${PORT}"
114
- cmd: |
115
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name Impish_Bloodmoon_12B
116
- ${run_vulkan}
117
- -hf SicariusSicariiStuff/Impish_Bloodmoon_12B_GGUF:Q4_K_M
118
- ${chat_default}
119
- cmdStop: "docker stop Impish_Bloodmoon_12B"
120
-
121
- "Darkidol-Ballad-9B":
122
- proxy: "http://127.0.0.1:${PORT}"
123
- cmd: |
124
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name Darkidol-Ballad-9B
125
- ${run_vulkan} -hf mradermacher/Darkidol-Ballad-9B-GGUF:Q5_K_M
126
- ${chat_default} ${draft_qwen_08}
127
- cmdStop: "docker stop Darkidol-Ballad-9B"
128
-
129
- "Darkidol-Ballad-9B-I":
130
- proxy: "http://127.0.0.1:${PORT}"
131
- cmd: |
132
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name Darkidol-Ballad-9B
133
- ${run_intel}
134
- -hf mradermacher/Darkidol-Ballad-9B-GGUF:Q4_K_M
135
- ${chat_default}
136
- cmdStop: "docker stop Darkidol-Ballad-9B"
137
-
138
- "TEST-MODELS":
139
- proxy: "http://127.0.0.1:${PORT}"
140
- cmd: |
141
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name TEST-MODELS
142
- ${run_test}
143
- -hf ReadyArt/Omega-Evolution-9B-v2.0-GGUF:Q5_K_M
144
- ${draft_qwen_08}
145
- --gpu-layers 99 --ctx-size 8192 --no-context-shift
146
- -np 1 --flash-attn on --fit on --swa-full
147
- --mlock --no-mmap -b 4096 --ubatch-size 1024
148
- --temp 0.6 --min-p 0.05 --reasoning off --no-warmup
149
- --grammar-file /tmp/grammar/strict_xml.gbnf
150
-
151
- cmdStop: "docker stop TEST-MODELS"
152
-
153
- "00-HELP":
154
- proxy: "http://127.0.0.1:${PORT}"
155
- cmd: |
156
- docker run --pull always --rm ${base_vRAM} -p ${PORT}:8080 --name I00-HELP
157
- ${run_test} -h
158
- cmdStop: "docker stop I00-HELP"
159
-
160
- filters:
161
- stripParams: "top_p, top_k, min_p, presence_penalty, frequency_penalty, repeat_last"