Spaces:

Sumkh
/

AgenticRAG

Sleeping

Sumkh commited on Feb 25, 2025

Commit

3cbd702

verified ·

1 Parent(s): e489746

Update start.sh

Files changed (1) hide show

start.sh CHANGED Viewed

@@ -8,12 +8,12 @@ mkdir -p /app/.cache
 export USER_AGENT="vllm_huggingface_space"
 # Launch the vLLM server with the model tag as a positional argument
-vllm serve unsloth/llama-3-8b-Instruct-bnb-4bit \
   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
-  --quantization bitsandbytes \
-  --load-format bitsandbytes \
   --dtype half \
   --enforce-eager \
   --max-model-len 8192 &

 export USER_AGENT="vllm_huggingface_space"
 # Launch the vLLM server with the model tag as a positional argument
+vllm serve MaziyarPanahi/Meta-Llama-3-70B-Instruct-GGUF \
   --enable-auto-tool-choice \
   --tool-call-parser llama3_json \
   --chat-template examples/tool_chat_template_llama3.1_json.jinja \
+  --quantization gguf \
+  --load-format gguf \
   --dtype half \
   --enforce-eager \
   --max-model-len 8192 &