OttoYu commited on
Commit
4dcfe25
·
verified ·
1 Parent(s): b1082de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -11
app.py CHANGED
@@ -8,17 +8,13 @@ from langchain_community.vectorstores import FAISS
8
  from langchain_core.prompts import PromptTemplate
9
  from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
10
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
11
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
12
- from transformers import AutoConfig
13
 
14
- MODEL_NAME = "TheBloke/Llama-2-13B-chat-GPTQ"
15
 
 
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
-
18
- config = AutoConfig.from_pretrained(MODEL_NAME)
19
- config.quantization_config.disable_exllama = True
20
-
21
- model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, config=config, device_map="cpu")
22
 
23
  text_pipeline = pipeline(
24
  "text-generation",
@@ -26,6 +22,7 @@ text_pipeline = pipeline(
26
  tokenizer=tokenizer
27
  )
28
 
 
29
  template = """
30
  <s>[INST] <<SYS>>
31
  Use the following information to answer the question at the end.
@@ -75,7 +72,7 @@ async def process_files(file_paths):
75
 
76
  embeddings = HuggingFaceEmbeddings(
77
  model_name="thenlper/gte-large",
78
- model_kwargs={"device": "cpu"}, # Use CPU instead of CUDA
79
  encode_kwargs={"normalize_embeddings": True},
80
  )
81
 
@@ -110,6 +107,9 @@ async def query_files(files, question):
110
 
111
  return generated_text
112
 
 
 
 
113
  with gr.Blocks() as interface:
114
  gr.Markdown("### Retrieval Augmented Generation (RAG) for LLM Local Trial")
115
  gr.Markdown(
@@ -121,8 +121,7 @@ with gr.Blocks() as interface:
121
 
122
  submit_button = gr.Button("Submit")
123
  output_text = gr.Textbox(label="LLM Response", lines=8)
124
- submit_button.click(lambda files, q: asyncio.run(query_files(files, q)), inputs=[files_input, question_input],
125
- outputs=output_text)
126
 
127
  if __name__ == "__main__":
128
  interface.launch()
 
8
  from langchain_core.prompts import PromptTemplate
9
  from langchain_community.document_loaders import PDFMinerLoader, CSVLoader, JSONLoader
10
  from langchain.text_splitter import SentenceTransformersTokenTextSplitter
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer, pipeline
 
12
 
13
+ MODEL_NAME = "TheBloke/Llama-2-7B-GPTQ"
14
 
15
+ # Initialize tokenizer and model
16
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
17
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="cpu")
 
 
 
 
18
 
19
  text_pipeline = pipeline(
20
  "text-generation",
 
22
  tokenizer=tokenizer
23
  )
24
 
25
+ # Define prompt template
26
  template = """
27
  <s>[INST] <<SYS>>
28
  Use the following information to answer the question at the end.
 
72
 
73
  embeddings = HuggingFaceEmbeddings(
74
  model_name="thenlper/gte-large",
75
+ model_kwargs={"device": "cpu"},
76
  encode_kwargs={"normalize_embeddings": True},
77
  )
78
 
 
107
 
108
  return generated_text
109
 
110
+ def process_and_query(files, question):
111
+ return asyncio.run(query_files(files, question))
112
+
113
  with gr.Blocks() as interface:
114
  gr.Markdown("### Retrieval Augmented Generation (RAG) for LLM Local Trial")
115
  gr.Markdown(
 
121
 
122
  submit_button = gr.Button("Submit")
123
  output_text = gr.Textbox(label="LLM Response", lines=8)
124
+ submit_button.click(process_and_query, inputs=[files_input, question_input], outputs=output_text)
 
125
 
126
  if __name__ == "__main__":
127
  interface.launch()