Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- Dockerfile +2 -3
- app.py +1 -1
Dockerfile
CHANGED
|
@@ -31,12 +31,11 @@ COPY . .
|
|
| 31 |
EXPOSE 7860
|
| 32 |
|
| 33 |
# Start vLLM in the background and then the Gradio app
|
| 34 |
-
CMD bash -c "
|
| 35 |
-
vllm.entrypoints.openai.api_server \
|
| 36 |
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
|
| 37 |
--enable-auto-tool-choice \
|
| 38 |
--tool-call-parser llama3_json \
|
| 39 |
-
--chat-template /
|
| 40 |
--quantization bitsandbytes \
|
| 41 |
--load-format bitsandbytes \
|
| 42 |
--dtype half \
|
|
|
|
| 31 |
EXPOSE 7860
|
| 32 |
|
| 33 |
# Start vLLM in the background and then the Gradio app
|
| 34 |
+
CMD bash -c "vllm.entrypoints.openai.api_server \
|
|
|
|
| 35 |
--model unsloth/llama-3-8b-Instruct-bnb-4bit \
|
| 36 |
--enable-auto-tool-choice \
|
| 37 |
--tool-call-parser llama3_json \
|
| 38 |
+
--chat-template examples/tool_chat_template_llama3.1_json.jinja \
|
| 39 |
--quantization bitsandbytes \
|
| 40 |
--load-format bitsandbytes \
|
| 41 |
--dtype half \
|
app.py
CHANGED
|
@@ -31,7 +31,7 @@ from docling.chunking import HybridChunker
|
|
| 31 |
from langchain_community.document_loaders import WebBaseLoader
|
| 32 |
from urllib.parse import urlparse
|
| 33 |
|
| 34 |
-
from langchain_groq import ChatGroq
|
| 35 |
from langchain_openai import ChatOpenAI
|
| 36 |
from langgraph.prebuilt import InjectedStore
|
| 37 |
from langgraph.store.base import BaseStore
|
|
|
|
| 31 |
from langchain_community.document_loaders import WebBaseLoader
|
| 32 |
from urllib.parse import urlparse
|
| 33 |
|
| 34 |
+
#from langchain_groq import ChatGroq
|
| 35 |
from langchain_openai import ChatOpenAI
|
| 36 |
from langgraph.prebuilt import InjectedStore
|
| 37 |
from langgraph.store.base import BaseStore
|