Spaces:

OttoYu
/

LLM-RAG

Runtime error

OttoYu commited on Jul 27, 2024

Commit

b1082de

verified ·

1 Parent(s): bc80689

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,11 +9,16 @@ from langchain_core.prompts import PromptTemplate
 from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
 from langchain.text_splitter import SentenceTransformersTokenTextSplitter
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 MODEL_NAME = "TheBloke/Llama-2-13B-chat-GPTQ"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
 text_pipeline = pipeline(
     "text-generation",
@@ -21,7 +26,6 @@ text_pipeline = pipeline(
     tokenizer=tokenizer
 )
-# Define the prompt template
 template = """
 <s>[INST] <<SYS>>
 Use the following information to answer the question at the end.

 from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
 from langchain.text_splitter import SentenceTransformersTokenTextSplitter
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+from transformers import AutoConfig
 MODEL_NAME = "TheBloke/Llama-2-13B-chat-GPTQ"
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+config = AutoConfig.from_pretrained(MODEL_NAME)
+config.quantization_config.disable_exllama = True
+model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, device_map="cpu")
 text_pipeline = pipeline(
     "text-generation",
     tokenizer=tokenizer
 )
 template = """
 <s>[INST] <<SYS>>
 Use the following information to answer the question at the end.