Spaces:
Sleeping
Sleeping
Dmitry Beresnev commited on
Commit ·
fe7089d
1
Parent(s): 97d9520
change llm model to qwen2 math
Browse files- Dockerfile +5 -2
Dockerfile
CHANGED
|
@@ -97,8 +97,11 @@ EXPOSE 7860
|
|
| 97 |
# CMD ["llama-server", "-hf", "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
|
| 98 |
# "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "512"]
|
| 99 |
#
|
| 100 |
-
# Example Mixtral-8x7B-Instruct (
|
| 101 |
-
CMD ["llama-server", "-hf", "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF:mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "64", "--ubatch-size", "32"]
|
|
|
|
|
|
|
|
|
|
| 102 |
#
|
| 103 |
# Example Qwen2.5-Coder 7B Instruct (32k context):
|
| 104 |
# CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",
|
|
|
|
| 97 |
# CMD ["llama-server", "-hf", "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
|
| 98 |
# "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "512"]
|
| 99 |
#
|
| 100 |
+
# Example Mixtral-8x7B-Instruct (known loader incompatibilities on newer llama.cpp + too large for 10GB RAM):
|
| 101 |
+
# CMD ["llama-server", "-hf", "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF:mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "64", "--ubatch-size", "32"]
|
| 102 |
+
#
|
| 103 |
+
# Active: Qwen2-Math-7B Q4_K_M
|
| 104 |
+
CMD ["llama-server", "-hf", "QuantFactory/Qwen2-Math-7B-GGUF:q4_k_m", "--host", "0.0.0.0", "--port", "7860", "-c", "8192", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "128", "--ubatch-size", "64"]
|
| 105 |
#
|
| 106 |
# Example Qwen2.5-Coder 7B Instruct (32k context):
|
| 107 |
# CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",
|