Dmitry Beresnev commited on
Commit
4f2dffc
·
1 Parent(s): cca3c7b

change llm model

Browse files
Files changed (1) hide show
  1. Dockerfile +2 -2
Dockerfile CHANGED
@@ -100,8 +100,8 @@ EXPOSE 7860
100
  # Example Mixtral-8x7B-Instruct (known loader incompatibilities on newer llama.cpp + too large for 10GB RAM):
101
  # CMD ["llama-server", "-hf", "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF:mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "64", "--ubatch-size", "32"]
102
  #
103
- # Active: Qwen2.5-Math-7B-Instruct Q4_K_M (math-focused, 8k target)
104
- CMD ["llama-server", "--hf-repo", "tensorblock/Qwen2.5-Math-7B-Instruct-GGUF", "--hf-file", "Qwen2.5-Math-7B-Instruct-Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "8192", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "128", "--ubatch-size", "64"]
105
  #
106
  # Example Qwen2.5-Coder 7B Instruct (32k context):
107
  # CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",
 
100
  # Example Mixtral-8x7B-Instruct (known loader incompatibilities on newer llama.cpp + too large for 10GB RAM):
101
  # CMD ["llama-server", "-hf", "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF:mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "64", "--ubatch-size", "32"]
102
  #
103
+ # Active: QuantFactory Qwen2.5-Math-7B-Instruct Q4_K_M (math-focused, 8k target)
104
+ CMD ["llama-server", "-hf", "QuantFactory/Qwen2.5-Math-7B-Instruct-GGUF:q4_k_m", "--host", "0.0.0.0", "--port", "7860", "-c", "8192", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "128", "--ubatch-size", "64"]
105
  #
106
  # Example Qwen2.5-Coder 7B Instruct (32k context):
107
  # CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",