Dmitry Beresnev commited on
Commit
97d9520
·
1 Parent(s): c33410f

change llm model to mistral

Browse files
Files changed (1) hide show
  1. Dockerfile +2 -3
Dockerfile CHANGED
@@ -97,9 +97,8 @@ EXPOSE 7860
97
  # CMD ["llama-server", "-hf", "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
98
  # "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "512"]
99
  #
100
- # Example Qwen2.5 7B Instruct (8k context, safer on 10GB RAM):
101
- # Verified llama.cpp args from model card.
102
- CMD ["llama-server", "--hf-repo", "paultimothymooney/Qwen2.5-7B-Instruct-Q4_K_M-GGUF", "--hf-file", "qwen2.5-7b-instruct-q4_k_m.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "8192", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "128", "--ubatch-size", "64"]
103
  #
104
  # Example Qwen2.5-Coder 7B Instruct (32k context):
105
  # CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",
 
97
  # CMD ["llama-server", "-hf", "TheBloke/deepseek-llm-7B-chat-GGUF:deepseek-llm-7b-chat.Q4_K_M.gguf",
98
  # "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "512"]
99
  #
100
+ # Example Mixtral-8x7B-Instruct (very heavy for 10GB CPU spaces):
101
+ CMD ["llama-server", "-hf", "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF:mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf", "--host", "0.0.0.0", "--port", "7860", "-c", "4096", "-t", "4", "-ngl", "0", "--cont-batching", "-b", "64", "--ubatch-size", "32"]
 
102
  #
103
  # Example Qwen2.5-Coder 7B Instruct (32k context):
104
  # CMD ["llama-server", "-hf", "Qwen/Qwen2.5-Coder-7B-Instruct-GGUF:Qwen2.5-Coder-7B-Instruct.Q4_K_M.gguf",