OttoYu commited on
Commit
b1082de
·
verified ·
1 Parent(s): bc80689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -9,11 +9,16 @@ from langchain_core.prompts import PromptTemplate
9
  from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
10
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
11
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 
12
 
13
  MODEL_NAME = "TheBloke/Llama-2-13B-chat-GPTQ"
14
 
15
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
16
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
 
 
 
 
17
 
18
  text_pipeline = pipeline(
19
  "text-generation",
@@ -21,7 +26,6 @@ text_pipeline = pipeline(
21
  tokenizer=tokenizer
22
  )
23
 
24
- # Define the prompt template
25
  template = """
26
  <s>[INST] <<SYS>>
27
  Use the following information to answer the question at the end.
 
9
  from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
10
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
11
  from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
12
+ from transformers import AutoConfig
13
 
14
  MODEL_NAME = "TheBloke/Llama-2-13B-chat-GPTQ"
15
 
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
+
18
+ config = AutoConfig.from_pretrained(MODEL_NAME)
19
+ config.quantization_config.disable_exllama = True
20
+
21
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, device_map="cpu")
22
 
23
  text_pipeline = pipeline(
24
  "text-generation",
 
26
  tokenizer=tokenizer
27
  )
28
 
 
29
  template = """
30
  <s>[INST] <<SYS>>
31
  Use the following information to answer the question at the end.