Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,8 @@ from langchain.memory import ConversationBufferMemory
|
|
| 12 |
from langchain.llms import HuggingFaceHub
|
| 13 |
from langchain.memory import ConversationBufferWindowMemory
|
| 14 |
from langchain_community.document_loaders import TextLoader
|
|
|
|
|
|
|
| 15 |
|
| 16 |
from pathlib import Path
|
| 17 |
import chromadb
|
|
@@ -80,7 +82,7 @@ def load_doc(list_file_path, chunk_size, chunk_overlap):
|
|
| 80 |
# Processing for one document only
|
| 81 |
# loader = PyPDFLoader(file_path)
|
| 82 |
# pages = loader.load()
|
| 83 |
-
loaders = [
|
| 84 |
pages = []
|
| 85 |
for loader in loaders:
|
| 86 |
pages.extend(loader.load())
|
|
@@ -227,16 +229,16 @@ def demo():
|
|
| 227 |
vector_db = gr.State()
|
| 228 |
qa_chain = gr.State()
|
| 229 |
collection_name = gr.State()
|
| 230 |
-
pdf_directory = '/home/user/app/
|
| 231 |
|
| 232 |
def process_pdfs():
|
| 233 |
# List all PDF files in the directory
|
| 234 |
-
pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".
|
| 235 |
return pdf_files
|
| 236 |
|
| 237 |
# Create a dictionary with the necessary information
|
| 238 |
pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
|
| 239 |
-
"visible": False, "file_types": ["
|
| 240 |
"label": "Uploaded PDF documents"}
|
| 241 |
|
| 242 |
# Create a gr.Files component with the dictionary
|
|
|
|
| 12 |
from langchain.llms import HuggingFaceHub
|
| 13 |
from langchain.memory import ConversationBufferWindowMemory
|
| 14 |
from langchain_community.document_loaders import TextLoader
|
| 15 |
+
from langchain_community.document_loaders import DirectoryLoader
|
| 16 |
+
from langchain_community.document_loaders import UnstructuredHTMLLoader
|
| 17 |
|
| 18 |
from pathlib import Path
|
| 19 |
import chromadb
|
|
|
|
| 82 |
# Processing for one document only
|
| 83 |
# loader = PyPDFLoader(file_path)
|
| 84 |
# pages = loader.load()
|
| 85 |
+
loaders = [UnstructuredHTMLLoader(x) for x in list_file_path]
|
| 86 |
pages = []
|
| 87 |
for loader in loaders:
|
| 88 |
pages.extend(loader.load())
|
|
|
|
| 229 |
vector_db = gr.State()
|
| 230 |
qa_chain = gr.State()
|
| 231 |
collection_name = gr.State()
|
| 232 |
+
pdf_directory = '/home/user/app/htmls/'
|
| 233 |
|
| 234 |
def process_pdfs():
|
| 235 |
# List all PDF files in the directory
|
| 236 |
+
pdf_files = [os.path.join(pdf_directory, file) for file in os.listdir(pdf_directory) if file.endswith(".html")]
|
| 237 |
return pdf_files
|
| 238 |
|
| 239 |
# Create a dictionary with the necessary information
|
| 240 |
pdf_dict = {"value": process_pdfs, "height": 100, "file_count": "multiple",
|
| 241 |
+
"visible": False, "file_types": ["html"], "interactive": True,
|
| 242 |
"label": "Uploaded PDF documents"}
|
| 243 |
|
| 244 |
# Create a gr.Files component with the dictionary
|