Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,8 +20,7 @@ import torch
|
|
| 20 |
import tqdm
|
| 21 |
import accelerate
|
| 22 |
|
| 23 |
-
|
| 24 |
-
default_persist_directory = './chromaDB/'
|
| 25 |
|
| 26 |
llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 27 |
|
|
@@ -52,10 +51,9 @@ def create_db(splits, collection_name):
|
|
| 52 |
vectordb = Chroma.from_documents(
|
| 53 |
documents=splits,
|
| 54 |
embedding=embedding,
|
|
|
|
| 55 |
client=new_client,
|
| 56 |
-
collection_name=collection_name
|
| 57 |
-
persist_directory=default_persist_directory
|
| 58 |
-
)
|
| 59 |
return vectordb
|
| 60 |
|
| 61 |
|
|
@@ -78,30 +76,7 @@ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, pr
|
|
| 78 |
llm = HuggingFaceHub(
|
| 79 |
repo_id=llm_model,
|
| 80 |
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
|
| 81 |
-
)
|
| 82 |
-
elif llm_model == "microsoft/phi-2":
|
| 83 |
-
raise gr.Error("phi-2 model requires 'trust_remote_code=True', currently not supported by langchain HuggingFaceHub...")
|
| 84 |
-
llm = HuggingFaceHub(
|
| 85 |
-
repo_id=llm_model,
|
| 86 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
| 87 |
-
)
|
| 88 |
-
elif llm_model == "TinyLlama/TinyLlama-1.1B-Chat-v1.0":
|
| 89 |
-
llm = HuggingFaceHub(
|
| 90 |
-
repo_id=llm_model,
|
| 91 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": 250, "top_k": top_k}
|
| 92 |
-
)
|
| 93 |
-
elif llm_model == "meta-llama/Llama-2-7b-chat-hf":
|
| 94 |
-
raise gr.Error("Llama-2-7b-chat-hf model requires a Pro subscription...")
|
| 95 |
-
llm = HuggingFaceHub(
|
| 96 |
-
repo_id=llm_model,
|
| 97 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
| 98 |
-
)
|
| 99 |
-
else:
|
| 100 |
-
llm = HuggingFaceHub(
|
| 101 |
-
repo_id=llm_model,
|
| 102 |
-
# model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "trust_remote_code": True, "torch_dtype": "auto"}
|
| 103 |
-
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k}
|
| 104 |
-
)
|
| 105 |
|
| 106 |
progress(0.75, desc="Defining buffer memory...")
|
| 107 |
memory = ConversationBufferMemory(
|
|
@@ -164,7 +139,7 @@ def format_chat_history(message, chat_history):
|
|
| 164 |
def conversation(qa_chain, message, history):
|
| 165 |
formatted_chat_history = format_chat_history(message, history)
|
| 166 |
#print("formatted_chat_history",formatted_chat_history)
|
| 167 |
-
|
| 168 |
# Generate response using QA chain
|
| 169 |
response = qa_chain({"question": message, "chat_history": formatted_chat_history})
|
| 170 |
response_answer = response["answer"]
|
|
@@ -199,12 +174,7 @@ def demo():
|
|
| 199 |
collection_name = gr.State()
|
| 200 |
|
| 201 |
gr.Markdown(
|
| 202 |
-
"""<center><h2>
|
| 203 |
-
<h3>Ask any questions about your PDF documents, along with follow-ups</h3>
|
| 204 |
-
<b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
|
| 205 |
-
When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
|
| 206 |
-
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
|
| 207 |
-
""")
|
| 208 |
with gr.Tab("Step 1 - Document pre-processing"):
|
| 209 |
with gr.Row():
|
| 210 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
|
|
|
| 20 |
import tqdm
|
| 21 |
import accelerate
|
| 22 |
|
| 23 |
+
default_persist_directory = './ChromaDB'
|
|
|
|
| 24 |
|
| 25 |
llm_name0 = "mistralai/Mixtral-8x7B-Instruct-v0.1"
|
| 26 |
|
|
|
|
| 51 |
vectordb = Chroma.from_documents(
|
| 52 |
documents=splits,
|
| 53 |
embedding=embedding,
|
| 54 |
+
persist_directory="./chroma_db",
|
| 55 |
client=new_client,
|
| 56 |
+
collection_name=collection_name)
|
|
|
|
|
|
|
| 57 |
return vectordb
|
| 58 |
|
| 59 |
|
|
|
|
| 76 |
llm = HuggingFaceHub(
|
| 77 |
repo_id=llm_model,
|
| 78 |
model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
|
| 79 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
progress(0.75, desc="Defining buffer memory...")
|
| 82 |
memory = ConversationBufferMemory(
|
|
|
|
| 139 |
def conversation(qa_chain, message, history):
|
| 140 |
formatted_chat_history = format_chat_history(message, history)
|
| 141 |
#print("formatted_chat_history",formatted_chat_history)
|
| 142 |
+
formatted_chat_history = ""
|
| 143 |
# Generate response using QA chain
|
| 144 |
response = qa_chain({"question": message, "chat_history": formatted_chat_history})
|
| 145 |
response_answer = response["answer"]
|
|
|
|
| 174 |
collection_name = gr.State()
|
| 175 |
|
| 176 |
gr.Markdown(
|
| 177 |
+
"""<center><h2>ChatPDF</center></h2>""")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
with gr.Tab("Step 1 - Document pre-processing"):
|
| 179 |
with gr.Row():
|
| 180 |
document = gr.Files(height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|