Update app.py
Browse files
app.py
CHANGED
|
@@ -78,7 +78,6 @@ documents = loader.load()
|
|
| 78 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
| 79 |
texts = text_splitter.split_documents(documents)
|
| 80 |
|
| 81 |
-
@spaces.GPU(duration=120)
|
| 82 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
| 83 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 84 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
|
@@ -87,7 +86,7 @@ if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
|
| 87 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 88 |
vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
|
| 89 |
|
| 90 |
-
|
| 91 |
def build_model(model_repo = CFG.model_name):
|
| 92 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
| 93 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
|
|
@@ -168,7 +167,7 @@ qa_chain = RetrievalQA.from_chain_type(
|
|
| 168 |
verbose = False
|
| 169 |
)
|
| 170 |
|
| 171 |
-
@spaces.GPU
|
| 172 |
def wrap_text_preserve_newlines(text, width=1500):
|
| 173 |
# Split the input text into lines based on newline characters
|
| 174 |
lines = text.split('\n')
|
|
@@ -181,7 +180,7 @@ def wrap_text_preserve_newlines(text, width=1500):
|
|
| 181 |
|
| 182 |
return wrapped_text
|
| 183 |
|
| 184 |
-
@spaces.GPU
|
| 185 |
def process_llm_response(llm_response):
|
| 186 |
ans = wrap_text_preserve_newlines(llm_response['result'])
|
| 187 |
|
|
@@ -204,7 +203,7 @@ def process_llm_response(llm_response):
|
|
| 204 |
|
| 205 |
return ans.strip()
|
| 206 |
|
| 207 |
-
@spaces.GPU
|
| 208 |
def llm_ans(query):
|
| 209 |
|
| 210 |
llm_response = qa_chain.invoke(query)
|
|
|
|
| 78 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = CFG.split_chunk_size, chunk_overlap = CFG.split_overlap)
|
| 79 |
texts = text_splitter.split_documents(documents)
|
| 80 |
|
|
|
|
| 81 |
if not os.path.exists(CFG.Embeddings_path + '/index.faiss'):
|
| 82 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 83 |
vectordb = FAISS.from_documents(documents=texts, embedding=embeddings)
|
|
|
|
| 86 |
embeddings = HuggingFaceInstructEmbeddings(model_name = CFG.embeddings_model_repo, model_kwargs={"device":"cuda"})
|
| 87 |
vectordb = FAISS.load_local(CFG.Output_folder + '/faiss_index_ml_papers', embeddings, allow_dangerous_deserialization=True)
|
| 88 |
|
| 89 |
+
@spaces.GPU
|
| 90 |
def build_model(model_repo = CFG.model_name):
|
| 91 |
tokenizer = AutoTokenizer.from_pretrained(model_repo)
|
| 92 |
model = AutoModelForCausalLM.from_pretrained(model_repo, attn_implementation="flash_attention_2")
|
|
|
|
| 167 |
verbose = False
|
| 168 |
)
|
| 169 |
|
| 170 |
+
@spaces.GPU
|
| 171 |
def wrap_text_preserve_newlines(text, width=1500):
|
| 172 |
# Split the input text into lines based on newline characters
|
| 173 |
lines = text.split('\n')
|
|
|
|
| 180 |
|
| 181 |
return wrapped_text
|
| 182 |
|
| 183 |
+
@spaces.GPU
|
| 184 |
def process_llm_response(llm_response):
|
| 185 |
ans = wrap_text_preserve_newlines(llm_response['result'])
|
| 186 |
|
|
|
|
| 203 |
|
| 204 |
return ans.strip()
|
| 205 |
|
| 206 |
+
@spaces.GPU
|
| 207 |
def llm_ans(query):
|
| 208 |
|
| 209 |
llm_response = qa_chain.invoke(query)
|