Upload 3 files
Browse files
image-caption-llama.cpp-api/image-caption-llama.cpp-api.py
CHANGED
|
@@ -5,7 +5,7 @@ import time
|
|
| 5 |
import httpx
|
| 6 |
from pathlib import Path
|
| 7 |
from openai import OpenAI
|
| 8 |
-
|
| 9 |
|
| 10 |
"""
|
| 11 |
# 仅放行两个路径
|
|
@@ -18,7 +18,7 @@ BASE_URL = "http://127.0.0.1:21234/v1"
|
|
| 18 |
# 请求密钥
|
| 19 |
API_KEY = "llama.cpp"
|
| 20 |
# 目标模型
|
| 21 |
-
MODEL_NAME = "qwen3.5-
|
| 22 |
|
| 23 |
# 重试控制:2 表示“初次处理 + 失败后重试 1 次”
|
| 24 |
MAX_ATTEMPTS = 2
|
|
@@ -26,15 +26,15 @@ MAX_ATTEMPTS = 2
|
|
| 26 |
# 标准参数
|
| 27 |
GEN_PARAMS = {
|
| 28 |
"max_completion_tokens": 2048,
|
| 29 |
-
"temperature":
|
| 30 |
}
|
| 31 |
|
| 32 |
# 特有参数
|
| 33 |
EXTRA_PARAMS = {
|
| 34 |
-
"repeat_penalty": 1.
|
| 35 |
-
"top_k":
|
| 36 |
-
"top_p":
|
| 37 |
-
"min_p": 0.
|
| 38 |
# "typical_p": 0.90,
|
| 39 |
}
|
| 40 |
|
|
@@ -128,6 +128,19 @@ def process_single_image(img_path):
|
|
| 128 |
|
| 129 |
description = response.choices[0].message.content
|
| 130 |
if description:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
txt_path = img_path.with_suffix(".txt")
|
| 132 |
with open(txt_path, "w", encoding="utf-8") as f:
|
| 133 |
f.write(description.strip())
|
|
|
|
| 5 |
import httpx
|
| 6 |
from pathlib import Path
|
| 7 |
from openai import OpenAI
|
| 8 |
+
import re
|
| 9 |
|
| 10 |
"""
|
| 11 |
# 仅放行两个路径
|
|
|
|
| 18 |
# 请求密钥
|
| 19 |
API_KEY = "llama.cpp"
|
| 20 |
# 目标模型
|
| 21 |
+
MODEL_NAME = "qwen3.5-35b-a3b"
|
| 22 |
|
| 23 |
# 重试控制:2 表示“初次处理 + 失败后重试 1 次”
|
| 24 |
MAX_ATTEMPTS = 2
|
|
|
|
| 26 |
# 标准参数
|
| 27 |
GEN_PARAMS = {
|
| 28 |
"max_completion_tokens": 2048,
|
| 29 |
+
"temperature": 1.00,
|
| 30 |
}
|
| 31 |
|
| 32 |
# 特有参数
|
| 33 |
EXTRA_PARAMS = {
|
| 34 |
+
"repeat_penalty": 1.00,
|
| 35 |
+
"top_k": 20,
|
| 36 |
+
"top_p": 0.95,
|
| 37 |
+
"min_p": 0.00,
|
| 38 |
# "typical_p": 0.90,
|
| 39 |
}
|
| 40 |
|
|
|
|
| 128 |
|
| 129 |
description = response.choices[0].message.content
|
| 130 |
if description:
|
| 131 |
+
|
| 132 |
+
# 使用正则表达式匹配 <think>...</think> 及其包含的所有内容
|
| 133 |
+
# re.DOTALL 确保 . 可以匹配换行符,re.IGNORECASE 忽略大小写
|
| 134 |
+
description = re.sub(
|
| 135 |
+
r"<think>.*?</think>", "", description, flags=re.DOTALL
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
# 去除可能残留在开头或结尾的空白字符
|
| 139 |
+
description = description.strip()
|
| 140 |
+
|
| 141 |
+
if not description: # 如果过滤后内容为空
|
| 142 |
+
return False, "过滤思考内容后结果为空", None
|
| 143 |
+
|
| 144 |
txt_path = img_path.with_suffix(".txt")
|
| 145 |
with open(txt_path, "w", encoding="utf-8") as f:
|
| 146 |
f.write(description.strip())
|
image-caption-llama.cpp-api/llama-server_vision_gemma4-26b-a4b-it.bat
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:: https://github.com/ggml-org/llama.cpp/releases/latest
|
| 2 |
+
:: https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64
|
| 3 |
+
:: https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@echo off
|
| 7 |
+
|
| 8 |
+
:: �̶����ڴ�С�������������ݹ�����Ҫע��
|
| 9 |
+
:: mode con cols=120 lines=30
|
| 10 |
+
|
| 11 |
+
:: ���ڱ���
|
| 12 |
+
title "LLaMA.cpp_b8672_CUDA-13.1_x64 -- [gemma-4-26B-A4B-it.Q8_0.gguf]"
|
| 13 |
+
|
| 14 |
+
:: �ڵ�����
|
| 15 |
+
color 0a
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
:: ���ýű��ĸ�Ŀ¼
|
| 19 |
+
set "SCRIPT_DIR=%~dp0"
|
| 20 |
+
cd /d "%SCRIPT_DIR%"
|
| 21 |
+
|
| 22 |
+
:: ����Ŀ¼�洢�� DIR �������������ʹ��
|
| 23 |
+
set "DIR=%SCRIPT_DIR%"
|
| 24 |
+
|
| 25 |
+
echo %DIR%
|
| 26 |
+
echo.
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
:: ���к���С����״̬��
|
| 30 |
+
:: %1(start /min cmd.exe /c %0 :&exit)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
:: �������--model (-m)
|
| 34 |
+
set "LLAMA_ARG_MODEL=F:\GGUF\mradermacher\Gemma4-26B-A4B-it-GGUF\gemma-4-26B-A4B-it.Q8_0.gguf"
|
| 35 |
+
|
| 36 |
+
:: ��ģ̬ͶӰ���ļ�·����--mmproj (-mm)
|
| 37 |
+
set "LLAMA_ARG_MMPROJ=F:\GGUF\mradermacher\Gemma4-26B-A4B-it-GGUF\gemma-4-26B-A4B-it.mmproj-f16.gguf"
|
| 38 |
+
|
| 39 |
+
:: ģ�ͱ������� REST API ʹ�á�--alias (-a)
|
| 40 |
+
set "LLAMA_ARG_ALIAS=gemma4-26b-a4b-it"
|
| 41 |
+
|
| 42 |
+
:: ������ַ��--host
|
| 43 |
+
set "LLAMA_ARG_HOST=0.0.0.0"
|
| 44 |
+
|
| 45 |
+
:: �����˿ڡ�--port
|
| 46 |
+
set "LLAMA_ARG_PORT=21234"
|
| 47 |
+
|
| 48 |
+
:: �Ƿ��������� WebUI ���档--webui, --no-webui
|
| 49 |
+
:: Ĭ�����á������������ API ����
|
| 50 |
+
set "LLAMA_ARG_WEBUI=true"
|
| 51 |
+
|
| 52 |
+
:: APIǰ������ĩβб�ܣ���--api-prefix
|
| 53 |
+
:: set "LLAMA_ARG_API_PREFIX=/api"
|
| 54 |
+
|
| 55 |
+
:: ������֤ API ��Կ�������Կ���ö��ŷָ���
|
| 56 |
+
:: set "LLAMA_API_KEY=C437704D-5114-3E5E-92B5-A18CBCB57344-20260226-123356"
|
| 57 |
+
|
| 58 |
+
:: ������ VRAM �е�ģ�Ͳ�����--n-gpu-layers (-ngl)
|
| 59 |
+
:: auto / all / ������ֵ�������� 99��/ 0 ��ʹ�� GPU
|
| 60 |
+
:: set "LLAMA_ARG_N_GPU_LAYERS=auto"
|
| 61 |
+
|
| 62 |
+
:: ��ʾ�������Ĵ�С��--ctx-size (-c)
|
| 63 |
+
:: Ĭ��Ϊ 0 ����ʾ��ģ���ڶ�ȡ��
|
| 64 |
+
set "LLAMA_ARG_CTX_SIZE=65535"
|
| 65 |
+
|
| 66 |
+
:: �Ƿ����� Flash Attention��--flash-attn (-fa)
|
| 67 |
+
:: on / off / auto
|
| 68 |
+
set "LLAMA_ARG_FLASH_ATTN=on"
|
| 69 |
+
|
| 70 |
+
:: ����ʱʹ�õ� CPU �߳�����Ĭ�� -1 ��--threads (-t)
|
| 71 |
+
set "LLAMA_ARG_THREADS=8"
|
| 72 |
+
|
| 73 |
+
:: ���������������--batch-size (-b)
|
| 74 |
+
set "LLAMA_ARG_BATCH=8192"
|
| 75 |
+
|
| 76 |
+
:: �����������������--ubatch-size (-ub)
|
| 77 |
+
set "LLAMA_ARG_UBATCH=4096"
|
| 78 |
+
|
| 79 |
+
:: �Ƿ����� KV ����ж�ء�Ĭ�����á�--kv-offload (-kvo), --no-kv-offload (-nkvo)
|
| 80 |
+
set "LLAMA_ARG_KV_OFFLOAD=true"
|
| 81 |
+
|
| 82 |
+
:: KV ������ K ���������͡�Ĭ�� f16 ��--cache-type-k (-ctk)
|
| 83 |
+
:: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
|
| 84 |
+
set "LLAMA_ARG_CACHE_TYPE_K=q8_0"
|
| 85 |
+
|
| 86 |
+
:: KV ������ V ���������͡�Ĭ�� f16 ��--cache-type-v (-ctv)
|
| 87 |
+
:: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
|
| 88 |
+
set "LLAMA_ARG_CACHE_TYPE_V=q8_0"
|
| 89 |
+
|
| 90 |
+
:: �Ƿ�������ʾ�ʻ��档--cache-prompt, --no-cache-prompt
|
| 91 |
+
set "LLAMA_ARG_CACHE_PROMPT=true"
|
| 92 |
+
|
| 93 |
+
:: ǿ��ϵͳ��ģ�ͱ������ڴ��У������ǽ��佻�������̻����ѹ��������--mlock
|
| 94 |
+
:: ��� LLAMA_ARG_MMAP ʹ�á�����ʵ��ģ�ʹ�С�������ڴ������
|
| 95 |
+
set "LLAMA_ARG_MLOCK=true"
|
| 96 |
+
|
| 97 |
+
:: �Ƿ�ʹ���ڴ�ӳ�䡣--mmap, --no-mmap
|
| 98 |
+
:: ������ǿ�ƽ�ģ�ͼ��ص������ڴ棬��ϴ��ڴ���� IO �ӳ�
|
| 99 |
+
set "LLAMA_ARG_MMAP=false"
|
| 100 |
+
|
| 101 |
+
:: Top-K ����������--top-k
|
| 102 |
+
:: 0 ����
|
| 103 |
+
:: set "LLAMA_ARG_TOP_K=0"
|
| 104 |
+
|
| 105 |
+
:: ����ģ�������� (JSON �ַ���)��--chat-template-kwargs
|
| 106 |
+
set "LLAMA_CHAT_TEMPLATE_KWARGS={"enable_thinking": true}"
|
| 107 |
+
|
| 108 |
+
:: ����˼��������Ԥ�㡣--reasoning-budget
|
| 109 |
+
:: -1 ˼άԤ�㲻�����ƣ�0 ����˼������
|
| 110 |
+
set "LLAMA_ARG_THINK_BUDGET=-1"
|
| 111 |
+
|
| 112 |
+
:: �Ƿ���������Ӧ�����ӻ���ȡ˼ά��ǩ���Լ���Щ��ǩӦ�Ժ��ָ�ʽ���ء�--reasoning-format
|
| 113 |
+
:: - none����˼������ԭ�ⲻ���ر����� `message.content` ��
|
| 114 |
+
:: - deepseek����˼�����ݷ��� `message.reasoning_content` ��
|
| 115 |
+
:: - deepseek-legacy������ `message.content` �б��� `<think>` ��ǩ��ͬʱҲ��˼��������䵽 `message.reasoning_content` ��
|
| 116 |
+
:: Ĭ�ϣ�auto
|
| 117 |
+
:: set "LLAMA_ARG_THINK=none"
|
| 118 |
+
|
| 119 |
+
:: �Ƿ�Ϊ����ʹ�� Jinja ģ�����档Ĭ�� true ��--jinja, --no-jinja
|
| 120 |
+
set "LLAMA_ARG_JINJA=true"
|
| 121 |
+
|
| 122 |
+
:: �Զ���� Jinja ����ģ���ļ�����������б���ѡȡ����--chat-template-file
|
| 123 |
+
:: Ĭ��ֵ����ģ�͵�Ԫ�����л�ȡ��ģ��
|
| 124 |
+
:: chatml deepseek3 gemma gpt-oss bailing-think ...
|
| 125 |
+
:: set "LLAMA_ARG_CHAT_TEMPLATE_FILE=chatml"
|
| 126 |
+
|
| 127 |
+
:: ���в�λ����������ͬʱ����������������������Դ�������--parallel (-np)
|
| 128 |
+
:: ����ƽ����� LLAMA_ARG_CTX_SIZE �����Ĵ�Сֵ��
|
| 129 |
+
set "LLAMA_ARG_N_PARALLEL=1"
|
| 130 |
+
|
| 131 |
+
:: �Ƿ��Զ�����δ���õIJ�������Ӧ�豸�ڴ档Ĭ�� on ��--fit (-fit)
|
| 132 |
+
:: set "LLAMA_ARG_FIT=off"
|
| 133 |
+
|
| 134 |
+
:: ����ģʽ��ǿ��ʹ�û��棬��ֹ������ʡ�--offline
|
| 135 |
+
set "LLAMA_OFFLINE=true"
|
| 136 |
+
|
| 137 |
+
:: ��־��Ϣ����ʱ������ܡ�--log-timestamps
|
| 138 |
+
set "LLAMA_LOG_TIMESTAMPS=true"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
@REM --lora: LoRA ������·�������������ʹ�ö��ŷָ���
|
| 142 |
+
|
| 143 |
+
@REM --repeat-penalty: �ظ��ͷ����Ƽ� 1.05~1.12 ����Ч���ٸ�����ͬʱ��ר�������ƻ���С��Ĭ�� 1.0 ��ʾ���á�
|
| 144 |
+
@REM --repeat-last-n: �ظ��ͷ��Ĵ��ڴ�С��ֻ����� N �� token �������ظ��ͷ�����Ĭ�� 64��
|
| 145 |
+
@REM �Ƽ�ֵ��128~256����������������ij���������Ϊ 256 ����ߣ���-1 ��ʾʹ�����������Ĵ�С��0 ��ʾ���á�
|
| 146 |
+
@REM �� --repeat-penalty ���ʹ�ã�����Խ������Ч��Խǿ�����������Ӱ�������Ȼ�ȡ�
|
| 147 |
+
@REM --presence-penalty: ���ڳͷ������������»��⣩��Ĭ�� 0.0 ��ʾ���á�
|
| 148 |
+
@REM --temp: �¶ȣ����Ÿ��ʷֲ������� 1.0 ���������/���⣻С�� 1.0 ����ȷ���ԡ�Ĭ�� 0.8��
|
| 149 |
+
@REM ���飺����/��ѧ/�Ͻ������� 0.1~0.3��һ��Ի��� 0.7~0.9����ѧ/����д���� 1.0~1.2��
|
| 150 |
+
@REM --top-p: �˲�����Nucleus����Ĭ�� 0.95��1.00 ��ʾ���á�
|
| 151 |
+
@REM --top-k: ���Ӹ�����ߵ� K �� token �в�����Ĭ�� 40��0 ��ʾ���á�
|
| 152 |
+
@REM --min-p: ��С���ʲ����������߸��ʵ���ֵ����Ĭ�� 0.05��0.0 ��ʾ���á�
|
| 153 |
+
@REM �� top-p �ڸ��¶��¸��ȶ���׳���Ƽ�����ʹ�á�
|
| 154 |
+
@REM --typical / --typical-p: ���Ͳ�����Locally Typical Sampling����Ĭ�� 1.0 ��ʾ���á�
|
| 155 |
+
@REM ��ʹ��д/�����ôʸ��ḻ�������Ͻ���������ã���Ϊ 1.0����
|
| 156 |
+
@REM ͬʱʹ�� Typical 0.95 + Min-P 0.05 ʱ���¶Ƚ��鲻���� 0.6��
|
| 157 |
+
|
| 158 |
+
@REM --prio: �������ȼ���0 ������1 �еȣ�2 �ߣ�3 ʵʱ��-1 �͡�0 Ĭ�ϡ�
|
| 159 |
+
|
| 160 |
+
@REM �꾡��־��-v, --verbose, --log-verbose
|
| 161 |
+
@REM ��־��ֵ��-lv, --verbosity, --log-verbosity N ��Ĭ�� 3 ��0:generic output / 1:error / 2:warning / 3:info / 4:debug
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
%DIR%\bin\llama-server.exe ^
|
| 165 |
+
--temp 1.00 ^
|
| 166 |
+
--min-p 0.00 ^
|
| 167 |
+
--top-k 64 ^
|
| 168 |
+
--top-p 0.95 ^
|
| 169 |
+
--repeat-penalty 1.00 ^
|
| 170 |
+
--presence-penalty 0.00 ^
|
| 171 |
+
--prio 2
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
pause
|
image-caption-llama.cpp-api/llama-server_vision_qwen3.5-35b-a3b-base.bat
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
:: https://github.com/ggml-org/llama.cpp/releases/latest
|
| 2 |
+
:: https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64
|
| 3 |
+
:: https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@echo off
|
| 7 |
+
|
| 8 |
+
:: �̶����ڴ�С�������������ݹ�����Ҫע��
|
| 9 |
+
:: mode con cols=120 lines=30
|
| 10 |
+
|
| 11 |
+
:: ���ڱ���
|
| 12 |
+
title "LLaMA.cpp_b8672_CUDA-13.1_x64 -- [Qwen3.5-35B-A3B-Base.Q8_0.gguf]"
|
| 13 |
+
|
| 14 |
+
:: �ڵ�����
|
| 15 |
+
color 0a
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
:: ���ýű��ĸ�Ŀ¼
|
| 19 |
+
set "SCRIPT_DIR=%~dp0"
|
| 20 |
+
cd /d "%SCRIPT_DIR%"
|
| 21 |
+
|
| 22 |
+
:: ����Ŀ¼�洢�� DIR �������������ʹ��
|
| 23 |
+
set "DIR=%SCRIPT_DIR%"
|
| 24 |
+
|
| 25 |
+
echo %DIR%
|
| 26 |
+
echo.
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
:: ���к���С����״̬��
|
| 30 |
+
:: %1(start /min cmd.exe /c %0 :&exit)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
:: �������--model (-m)
|
| 34 |
+
set "LLAMA_ARG_MODEL=F:\GGUF\mradermacher\Qwen3.5-35B-A3B-Base-GGUF\Qwen3.5-35B-A3B-Base.Q8_0.gguf"
|
| 35 |
+
|
| 36 |
+
:: ��ģ̬ͶӰ���ļ�·����--mmproj (-mm)
|
| 37 |
+
set "LLAMA_ARG_MMPROJ=F:\GGUF\mradermacher\Qwen3.5-35B-A3B-Base-GGUF\Qwen3.5-35B-A3B-Base.mmproj-f16.gguf"
|
| 38 |
+
|
| 39 |
+
:: ģ�ͱ������� REST API ʹ�á�--alias (-a)
|
| 40 |
+
set "LLAMA_ARG_ALIAS=qwen3.5-35b-a3b-base"
|
| 41 |
+
|
| 42 |
+
:: ������ַ��--host
|
| 43 |
+
set "LLAMA_ARG_HOST=0.0.0.0"
|
| 44 |
+
|
| 45 |
+
:: �����˿ڡ�--port
|
| 46 |
+
set "LLAMA_ARG_PORT=21234"
|
| 47 |
+
|
| 48 |
+
:: �Ƿ��������� WebUI ���档--webui, --no-webui
|
| 49 |
+
:: Ĭ�����á������������ API ����
|
| 50 |
+
set "LLAMA_ARG_WEBUI=true"
|
| 51 |
+
|
| 52 |
+
:: APIǰ������ĩβб�ܣ���--api-prefix
|
| 53 |
+
:: set "LLAMA_ARG_API_PREFIX=/api"
|
| 54 |
+
|
| 55 |
+
:: ������֤ API ��Կ�������Կ���ö��ŷָ���
|
| 56 |
+
:: set "LLAMA_API_KEY=C437704D-5114-3E5E-92B5-A18CBCB57344-20260226-123356"
|
| 57 |
+
|
| 58 |
+
:: ������ VRAM �е�ģ�Ͳ�����--n-gpu-layers (-ngl)
|
| 59 |
+
:: auto / all / ������ֵ�������� 99��/ 0 ��ʹ�� GPU
|
| 60 |
+
:: set "LLAMA_ARG_N_GPU_LAYERS=auto"
|
| 61 |
+
|
| 62 |
+
:: ��ʾ�������Ĵ�С��--ctx-size (-c)
|
| 63 |
+
:: Ĭ��Ϊ 0 ����ʾ��ģ���ڶ�ȡ��
|
| 64 |
+
set "LLAMA_ARG_CTX_SIZE=65535"
|
| 65 |
+
|
| 66 |
+
:: �Ƿ����� Flash Attention��--flash-attn (-fa)
|
| 67 |
+
:: on / off / auto
|
| 68 |
+
set "LLAMA_ARG_FLASH_ATTN=on"
|
| 69 |
+
|
| 70 |
+
:: ����ʱʹ�õ� CPU �߳�����Ĭ�� -1 ��--threads (-t)
|
| 71 |
+
set "LLAMA_ARG_THREADS=8"
|
| 72 |
+
|
| 73 |
+
:: ���������������--batch-size (-b)
|
| 74 |
+
set "LLAMA_ARG_BATCH=8192"
|
| 75 |
+
|
| 76 |
+
:: �����������������--ubatch-size (-ub)
|
| 77 |
+
set "LLAMA_ARG_UBATCH=4096"
|
| 78 |
+
|
| 79 |
+
:: �Ƿ����� KV ����ж�ء�Ĭ�����á�--kv-offload (-kvo), --no-kv-offload (-nkvo)
|
| 80 |
+
set "LLAMA_ARG_KV_OFFLOAD=true"
|
| 81 |
+
|
| 82 |
+
:: KV ������ K ���������͡�Ĭ�� f16 ��--cache-type-k (-ctk)
|
| 83 |
+
:: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
|
| 84 |
+
set "LLAMA_ARG_CACHE_TYPE_K=q8_0"
|
| 85 |
+
|
| 86 |
+
:: KV ������ V ���������͡�Ĭ�� f16 ��--cache-type-v (-ctv)
|
| 87 |
+
:: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
|
| 88 |
+
set "LLAMA_ARG_CACHE_TYPE_V=q8_0"
|
| 89 |
+
|
| 90 |
+
:: �Ƿ�������ʾ�ʻ��档--cache-prompt, --no-cache-prompt
|
| 91 |
+
set "LLAMA_ARG_CACHE_PROMPT=true"
|
| 92 |
+
|
| 93 |
+
:: ǿ��ϵͳ��ģ�ͱ������ڴ��У������ǽ��佻�������̻����ѹ��������--mlock
|
| 94 |
+
:: ��� LLAMA_ARG_MMAP ʹ�á�����ʵ��ģ�ʹ�С�������ڴ������
|
| 95 |
+
set "LLAMA_ARG_MLOCK=true"
|
| 96 |
+
|
| 97 |
+
:: �Ƿ�ʹ���ڴ�ӳ�䡣--mmap, --no-mmap
|
| 98 |
+
:: ������ǿ�ƽ�ģ�ͼ��ص������ڴ棬��ϴ��ڴ���� IO �ӳ�
|
| 99 |
+
set "LLAMA_ARG_MMAP=false"
|
| 100 |
+
|
| 101 |
+
:: Top-K ����������--top-k
|
| 102 |
+
:: 0 ����
|
| 103 |
+
:: set "LLAMA_ARG_TOP_K=0"
|
| 104 |
+
|
| 105 |
+
:: ����ģ�������� (JSON �ַ���)��--chat-template-kwargs
|
| 106 |
+
set "LLAMA_CHAT_TEMPLATE_KWARGS={"enable_thinking": true}"
|
| 107 |
+
|
| 108 |
+
:: ����˼��������Ԥ�㡣--reasoning-budget
|
| 109 |
+
:: -1 ˼άԤ�㲻�����ƣ�0 ����˼������
|
| 110 |
+
set "LLAMA_ARG_THINK_BUDGET=-1"
|
| 111 |
+
|
| 112 |
+
:: �Ƿ���������Ӧ�����ӻ���ȡ˼ά��ǩ���Լ���Щ��ǩӦ�Ժ��ָ�ʽ���ء�--reasoning-format
|
| 113 |
+
:: - none����˼������ԭ�ⲻ���ر����� `message.content` ��
|
| 114 |
+
:: - deepseek����˼�����ݷ��� `message.reasoning_content` ��
|
| 115 |
+
:: - deepseek-legacy������ `message.content` �б��� `<think>` ��ǩ��ͬʱҲ��˼��������䵽 `message.reasoning_content` ��
|
| 116 |
+
:: Ĭ�ϣ�auto
|
| 117 |
+
:: set "LLAMA_ARG_THINK=none"
|
| 118 |
+
|
| 119 |
+
:: �Ƿ�Ϊ����ʹ�� Jinja ģ�����档Ĭ�� true ��--jinja, --no-jinja
|
| 120 |
+
set "LLAMA_ARG_JINJA=true"
|
| 121 |
+
|
| 122 |
+
:: �Զ���� Jinja ����ģ���ļ�����������б���ѡȡ����--chat-template-file
|
| 123 |
+
:: Ĭ��ֵ����ģ�͵�Ԫ�����л�ȡ��ģ��
|
| 124 |
+
:: chatml deepseek3 gemma gpt-oss bailing-think ...
|
| 125 |
+
:: set "LLAMA_ARG_CHAT_TEMPLATE_FILE=chatml"
|
| 126 |
+
|
| 127 |
+
:: ���в�λ����������ͬʱ����������������������Դ�������--parallel (-np)
|
| 128 |
+
:: ����ƽ����� LLAMA_ARG_CTX_SIZE �����Ĵ�Сֵ��
|
| 129 |
+
set "LLAMA_ARG_N_PARALLEL=1"
|
| 130 |
+
|
| 131 |
+
:: �Ƿ��Զ�����δ���õIJ�������Ӧ�豸�ڴ档Ĭ�� on ��--fit (-fit)
|
| 132 |
+
:: set "LLAMA_ARG_FIT=off"
|
| 133 |
+
|
| 134 |
+
:: ����ģʽ��ǿ��ʹ�û��棬��ֹ������ʡ�--offline
|
| 135 |
+
set "LLAMA_OFFLINE=true"
|
| 136 |
+
|
| 137 |
+
:: ��־��Ϣ����ʱ������ܡ�--log-timestamps
|
| 138 |
+
set "LLAMA_LOG_TIMESTAMPS=true"
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
@REM --lora: LoRA ������·�������������ʹ�ö��ŷָ���
|
| 142 |
+
|
| 143 |
+
@REM --repeat-penalty: �ظ��ͷ����Ƽ� 1.05~1.12 ����Ч���ٸ�����ͬʱ��ר�������ƻ���С��Ĭ�� 1.0 ��ʾ���á�
|
| 144 |
+
@REM --repeat-last-n: �ظ��ͷ��Ĵ��ڴ�С��ֻ����� N �� token �������ظ��ͷ�����Ĭ�� 64��
|
| 145 |
+
@REM �Ƽ�ֵ��128~256����������������ij���������Ϊ 256 ����ߣ���-1 ��ʾʹ�����������Ĵ�С��0 ��ʾ���á�
|
| 146 |
+
@REM �� --repeat-penalty ���ʹ�ã�����Խ������Ч��Խǿ�����������Ӱ�������Ȼ�ȡ�
|
| 147 |
+
@REM --presence-penalty: ���ڳͷ������������»��⣩��Ĭ�� 0.0 ��ʾ���á�
|
| 148 |
+
@REM --temp: �¶ȣ����Ÿ��ʷֲ������� 1.0 ���������/���⣻С�� 1.0 ����ȷ���ԡ�Ĭ�� 0.8��
|
| 149 |
+
@REM ���飺����/��ѧ/�Ͻ������� 0.1~0.3��һ��Ի��� 0.7~0.9����ѧ/����д���� 1.0~1.2��
|
| 150 |
+
@REM --top-p: �˲�����Nucleus����Ĭ�� 0.95��1.00 ��ʾ���á�
|
| 151 |
+
@REM --top-k: ���Ӹ�����ߵ� K �� token �в�����Ĭ�� 40��0 ��ʾ���á�
|
| 152 |
+
@REM --min-p: ��С���ʲ����������߸��ʵ���ֵ����Ĭ�� 0.05��0.0 ��ʾ���á�
|
| 153 |
+
@REM �� top-p �ڸ��¶��¸��ȶ���׳���Ƽ�����ʹ�á�
|
| 154 |
+
@REM --typical / --typical-p: ���Ͳ�����Locally Typical Sampling����Ĭ�� 1.0 ��ʾ���á�
|
| 155 |
+
@REM ��ʹ��д/�����ôʸ��ḻ�������Ͻ���������ã���Ϊ 1.0����
|
| 156 |
+
@REM ͬʱʹ�� Typical 0.95 + Min-P 0.05 ʱ���¶Ƚ��鲻���� 0.6��
|
| 157 |
+
|
| 158 |
+
@REM --prio: �������ȼ���0 ������1 �еȣ�2 �ߣ�3 ʵʱ��-1 �͡�0 Ĭ�ϡ�
|
| 159 |
+
|
| 160 |
+
@REM �꾡��־��-v, --verbose, --log-verbose
|
| 161 |
+
@REM ��־��ֵ��-lv, --verbosity, --log-verbosity N ��Ĭ�� 3 ��0:generic output / 1:error / 2:warning / 3:info / 4:debug
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
:: Qwen3.5 ϵ�йٷ��Ƽ���������
|
| 165 |
+
@REM ͨ������˼άģʽ - Thinking mode for general tasks: temperature=1.0, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0
|
| 166 |
+
@REM �����˼άģʽ - Thinking mode for precise coding tasks (e.g. WebDev): temperature=0.6, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=0.0, repetition_penalty=1.0
|
| 167 |
+
@REM ͨ������ָ��ģʽ - Instruct (or non-thinking) mode for general tasks: temperature=0.7, top_p=0.8, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0
|
| 168 |
+
@REM ��������ָ��ģʽ - Instruct (or non-thinking) mode for reasoning tasks: temperature=1.0, top_p=0.95, top_k=20, min_p=0.0, presence_penalty=1.5, repetition_penalty=1.0
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
%DIR%\bin\llama-server.exe ^
|
| 172 |
+
--temp 1.00 ^
|
| 173 |
+
--min-p 0.00 ^
|
| 174 |
+
--top-k 20 ^
|
| 175 |
+
--top-p 0.95 ^
|
| 176 |
+
--repeat-penalty 1.00 ^
|
| 177 |
+
--presence-penalty 1.50 ^
|
| 178 |
+
--prio 2
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
pause
|