Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,7 +7,7 @@ from typing import List
|
|
| 7 |
from tempfile import NamedTemporaryFile
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from langchain_community.document_loaders import PyPDFLoader
|
| 10 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 11 |
from langchain_community.vectorstores import FAISS
|
| 12 |
from langchain.docstore.document import Document
|
| 13 |
|
|
@@ -18,9 +18,14 @@ logging.basicConfig(level=logging.INFO)
|
|
| 18 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
| 19 |
DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
|
| 20 |
HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
|
|
|
|
| 21 |
|
| 22 |
def get_embeddings():
|
| 23 |
-
return HuggingFaceEmbeddings(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
def load_documents():
|
| 26 |
if os.path.exists(DOCUMENTS_FILE):
|
|
@@ -37,16 +42,17 @@ def load_document(file: NamedTemporaryFile) -> List[Document]:
|
|
| 37 |
loader = PyPDFLoader(file.name)
|
| 38 |
return loader.load_and_split()
|
| 39 |
|
| 40 |
-
def
|
| 41 |
if not files:
|
| 42 |
return "Please upload at least one file.", []
|
| 43 |
|
|
|
|
| 44 |
embed = get_embeddings()
|
| 45 |
uploaded_documents = load_documents()
|
| 46 |
total_chunks = 0
|
| 47 |
|
| 48 |
all_data = []
|
| 49 |
-
for file in
|
| 50 |
try:
|
| 51 |
data = load_document(file)
|
| 52 |
if not data:
|
|
@@ -73,7 +79,7 @@ def update_vectors(files):
|
|
| 73 |
database.save_local("faiss_database")
|
| 74 |
|
| 75 |
save_documents(uploaded_documents)
|
| 76 |
-
return f"Vector store updated successfully. Processed {total_chunks} chunks.", uploaded_documents
|
| 77 |
|
| 78 |
except Exception as e:
|
| 79 |
return f"Error updating vector store: {str(e)}", []
|
|
@@ -102,7 +108,8 @@ def delete_documents(selected_docs):
|
|
| 102 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
|
| 103 |
save_documents(uploaded_documents)
|
| 104 |
|
| 105 |
-
|
|
|
|
| 106 |
|
| 107 |
return "No documents to delete.", []
|
| 108 |
|
|
@@ -164,7 +171,7 @@ def create_interface():
|
|
| 164 |
files = gr.File(
|
| 165 |
label="Upload PDF Documents",
|
| 166 |
file_types=[".pdf"],
|
| 167 |
-
multiple
|
| 168 |
)
|
| 169 |
upload_button = gr.Button("Upload and Process")
|
| 170 |
|
|
@@ -179,22 +186,29 @@ def create_interface():
|
|
| 179 |
|
| 180 |
with gr.Row():
|
| 181 |
with gr.Column():
|
| 182 |
-
question = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
| 183 |
temperature = gr.Slider(
|
| 184 |
minimum=0.0,
|
| 185 |
maximum=1.0,
|
| 186 |
value=0.2,
|
| 187 |
step=0.1,
|
| 188 |
-
label="Temperature"
|
| 189 |
)
|
| 190 |
submit_button = gr.Button("Submit Question")
|
| 191 |
|
| 192 |
with gr.Column():
|
| 193 |
-
answer = gr.Textbox(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 194 |
|
| 195 |
# Event handlers
|
| 196 |
upload_button.click(
|
| 197 |
-
fn=
|
| 198 |
inputs=[files],
|
| 199 |
outputs=[doc_status, doc_list]
|
| 200 |
)
|
|
@@ -210,9 +224,20 @@ def create_interface():
|
|
| 210 |
inputs=[question, temperature],
|
| 211 |
outputs=[answer]
|
| 212 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
|
| 214 |
return app
|
| 215 |
|
| 216 |
if __name__ == "__main__":
|
| 217 |
app = create_interface()
|
| 218 |
-
app.launch(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
from tempfile import NamedTemporaryFile
|
| 8 |
from huggingface_hub import InferenceClient
|
| 9 |
from langchain_community.document_loaders import PyPDFLoader
|
| 10 |
+
from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
|
| 11 |
from langchain_community.vectorstores import FAISS
|
| 12 |
from langchain.docstore.document import Document
|
| 13 |
|
|
|
|
| 18 |
DOCUMENTS_FILE = "uploaded_documents.json"
|
| 19 |
DEFAULT_MODEL = "@cf/meta/llama-2-7b-chat"
|
| 20 |
HF_TOKEN = os.getenv("HF_API_TOKEN") # Make sure to set this environment variable
|
| 21 |
+
EMBED_MODEL = "sentence-transformers/all-mpnet-base-v2"
|
| 22 |
|
| 23 |
def get_embeddings():
|
| 24 |
+
return HuggingFaceEmbeddings(
|
| 25 |
+
model_name=EMBED_MODEL,
|
| 26 |
+
model_kwargs={'device': 'cpu'},
|
| 27 |
+
encode_kwargs={'normalize_embeddings': True}
|
| 28 |
+
)
|
| 29 |
|
| 30 |
def load_documents():
|
| 31 |
if os.path.exists(DOCUMENTS_FILE):
|
|
|
|
| 42 |
loader = PyPDFLoader(file.name)
|
| 43 |
return loader.load_and_split()
|
| 44 |
|
| 45 |
+
def process_uploaded_files(files):
|
| 46 |
if not files:
|
| 47 |
return "Please upload at least one file.", []
|
| 48 |
|
| 49 |
+
files_list = [files] if not isinstance(files, list) else files
|
| 50 |
embed = get_embeddings()
|
| 51 |
uploaded_documents = load_documents()
|
| 52 |
total_chunks = 0
|
| 53 |
|
| 54 |
all_data = []
|
| 55 |
+
for file in files_list:
|
| 56 |
try:
|
| 57 |
data = load_document(file)
|
| 58 |
if not data:
|
|
|
|
| 79 |
database.save_local("faiss_database")
|
| 80 |
|
| 81 |
save_documents(uploaded_documents)
|
| 82 |
+
return f"Vector store updated successfully. Processed {total_chunks} chunks.", [doc["name"] for doc in uploaded_documents]
|
| 83 |
|
| 84 |
except Exception as e:
|
| 85 |
return f"Error updating vector store: {str(e)}", []
|
|
|
|
| 108 |
uploaded_documents = [doc for doc in uploaded_documents if doc["name"] not in selected_docs]
|
| 109 |
save_documents(uploaded_documents)
|
| 110 |
|
| 111 |
+
remaining_docs = [doc["name"] for doc in uploaded_documents]
|
| 112 |
+
return f"Deleted documents: {', '.join(selected_docs)}", remaining_docs
|
| 113 |
|
| 114 |
return "No documents to delete.", []
|
| 115 |
|
|
|
|
| 171 |
files = gr.File(
|
| 172 |
label="Upload PDF Documents",
|
| 173 |
file_types=[".pdf"],
|
| 174 |
+
file_count="multiple"
|
| 175 |
)
|
| 176 |
upload_button = gr.Button("Upload and Process")
|
| 177 |
|
|
|
|
| 186 |
|
| 187 |
with gr.Row():
|
| 188 |
with gr.Column():
|
| 189 |
+
question = gr.Textbox(
|
| 190 |
+
label="Ask a question about the documents",
|
| 191 |
+
placeholder="Enter your question here..."
|
| 192 |
+
)
|
| 193 |
temperature = gr.Slider(
|
| 194 |
minimum=0.0,
|
| 195 |
maximum=1.0,
|
| 196 |
value=0.2,
|
| 197 |
step=0.1,
|
| 198 |
+
label="Temperature (Higher values make the output more random)"
|
| 199 |
)
|
| 200 |
submit_button = gr.Button("Submit Question")
|
| 201 |
|
| 202 |
with gr.Column():
|
| 203 |
+
answer = gr.Textbox(
|
| 204 |
+
label="Answer",
|
| 205 |
+
interactive=False,
|
| 206 |
+
lines=10
|
| 207 |
+
)
|
| 208 |
|
| 209 |
# Event handlers
|
| 210 |
upload_button.click(
|
| 211 |
+
fn=process_uploaded_files,
|
| 212 |
inputs=[files],
|
| 213 |
outputs=[doc_status, doc_list]
|
| 214 |
)
|
|
|
|
| 224 |
inputs=[question, temperature],
|
| 225 |
outputs=[answer]
|
| 226 |
)
|
| 227 |
+
|
| 228 |
+
# Add keyboard shortcut for submitting questions
|
| 229 |
+
question.submit(
|
| 230 |
+
fn=get_response,
|
| 231 |
+
inputs=[question, temperature],
|
| 232 |
+
outputs=[answer]
|
| 233 |
+
)
|
| 234 |
|
| 235 |
return app
|
| 236 |
|
| 237 |
if __name__ == "__main__":
|
| 238 |
app = create_interface()
|
| 239 |
+
app.launch(
|
| 240 |
+
server_name="0.0.0.0", # Makes the app accessible from other machines
|
| 241 |
+
server_port=7860, # Specify port
|
| 242 |
+
share=True # Creates a public URL
|
| 243 |
+
)
|