Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,6 +24,9 @@ from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
|
|
| 24 |
from transformers import TextIteratorStreamer
|
| 25 |
from threading import Thread
|
| 26 |
|
|
|
|
|
|
|
|
|
|
| 27 |
# Prompt template
|
| 28 |
template = """Instruction:
|
| 29 |
You are an AI assistant for answering questions about the provided context.
|
|
@@ -44,7 +47,12 @@ QA_PROMPT = PromptTemplate(
|
|
| 44 |
model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
|
| 45 |
|
| 46 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
# sentence transformers to be used in vector store
|
| 50 |
embeddings = HuggingFaceEmbeddings(
|
|
|
|
| 24 |
from transformers import TextIteratorStreamer
|
| 25 |
from threading import Thread
|
| 26 |
|
| 27 |
+
#MR Added
|
| 28 |
+
from transformers import GPTQConfig
|
| 29 |
+
|
| 30 |
# Prompt template
|
| 31 |
template = """Instruction:
|
| 32 |
You are an AI assistant for answering questions about the provided context.
|
|
|
|
| 47 |
model_id = "TheBloke/dolphin-2_6-phi-2-GPTQ" #change MR
|
| 48 |
|
| 49 |
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
| 50 |
+
|
| 51 |
+
quantization_config_loading = GPTQConfig(bits=4, disable_exllama=True) #MR Added
|
| 52 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float32, device_map="auto",
|
| 53 |
+
trust_remote_code=True,
|
| 54 |
+
quantization_config=quantization_config_loading #MR Added
|
| 55 |
+
)
|
| 56 |
|
| 57 |
# sentence transformers to be used in vector store
|
| 58 |
embeddings = HuggingFaceEmbeddings(
|