Sumkh commited on
Commit
d5c8720
·
verified ·
1 Parent(s): b29e1e3

Upload 2 files

Browse files
Files changed (2) hide show
  1. Dockerfile +2 -3
  2. app.py +1 -1
Dockerfile CHANGED
@@ -31,12 +31,11 @@ COPY . .
31
  EXPOSE 7860
32
 
33
  # Start vLLM in the background and then the Gradio app
34
- CMD bash -c "wget -O /tmp/tool_chat_template_llama3.1_json.jinja https://github.com/vllm-project/vllm/raw/refs/heads/main/examples/tool_chat_template_llama3.1_json.jinja && \
35
- vllm.entrypoints.openai.api_server \
36
  --model unsloth/llama-3-8b-Instruct-bnb-4bit \
37
  --enable-auto-tool-choice \
38
  --tool-call-parser llama3_json \
39
- --chat-template /tmp/tool_chat_template_llama3.1_json.jinja \
40
  --quantization bitsandbytes \
41
  --load-format bitsandbytes \
42
  --dtype half \
 
31
  EXPOSE 7860
32
 
33
  # Start vLLM in the background and then the Gradio app
34
+ CMD bash -c "vllm.entrypoints.openai.api_server \
 
35
  --model unsloth/llama-3-8b-Instruct-bnb-4bit \
36
  --enable-auto-tool-choice \
37
  --tool-call-parser llama3_json \
38
+ --chat-template examples/tool_chat_template_llama3.1_json.jinja \
39
  --quantization bitsandbytes \
40
  --load-format bitsandbytes \
41
  --dtype half \
app.py CHANGED
@@ -31,7 +31,7 @@ from docling.chunking import HybridChunker
31
  from langchain_community.document_loaders import WebBaseLoader
32
  from urllib.parse import urlparse
33
 
34
- from langchain_groq import ChatGroq
35
  from langchain_openai import ChatOpenAI
36
  from langgraph.prebuilt import InjectedStore
37
  from langgraph.store.base import BaseStore
 
31
  from langchain_community.document_loaders import WebBaseLoader
32
  from urllib.parse import urlparse
33
 
34
+ #from langchain_groq import ChatGroq
35
  from langchain_openai import ChatOpenAI
36
  from langgraph.prebuilt import InjectedStore
37
  from langgraph.store.base import BaseStore