Spaces:
Runtime error
Runtime error
sanjeevl10 commited on
Commit ·
d154644
1
Parent(s): cb19fde
load the airbnb llm
Browse files- app.py +10 -9
- data/airbnb-10k.pdf +0 -0
app.py
CHANGED
|
@@ -6,12 +6,10 @@ from langchain_huggingface import HuggingFaceEndpoint
|
|
| 6 |
from langchain_community.document_loaders import TextLoader
|
| 7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 8 |
from langchain_community.vectorstores import FAISS
|
|
|
|
| 9 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
| 10 |
from langchain_core.prompts import PromptTemplate
|
| 11 |
-
from langchain.schema.output_parser import StrOutputParser
|
| 12 |
-
from langchain.schema.runnable import RunnablePassthrough
|
| 13 |
from langchain.schema.runnable.config import RunnableConfig
|
| 14 |
-
from pathlib import Path
|
| 15 |
|
| 16 |
# GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
|
| 17 |
# ---- ENV VARIABLES ---- #
|
|
@@ -38,10 +36,13 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
| 38 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
| 39 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
| 40 |
"""
|
| 41 |
-
#
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
| 47 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
@@ -131,10 +132,10 @@ def rename(original_author: str):
|
|
| 131 |
"""
|
| 132 |
This function can be used to rename the 'author' of a message.
|
| 133 |
|
| 134 |
-
In this case, we're overriding the 'Assistant' author to be '
|
| 135 |
"""
|
| 136 |
rename_dict = {
|
| 137 |
-
"Assistant" : "
|
| 138 |
}
|
| 139 |
return rename_dict.get(original_author, original_author)
|
| 140 |
|
|
|
|
| 6 |
from langchain_community.document_loaders import TextLoader
|
| 7 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 8 |
from langchain_community.vectorstores import FAISS
|
| 9 |
+
from langchain.document_loaders import PyMuPDFLoader
|
| 10 |
from langchain_huggingface import HuggingFaceEndpointEmbeddings
|
| 11 |
from langchain_core.prompts import PromptTemplate
|
|
|
|
|
|
|
| 12 |
from langchain.schema.runnable.config import RunnableConfig
|
|
|
|
| 13 |
|
| 14 |
# GLOBAL SCOPE - ENTIRE APPLICATION HAS ACCESS TO VALUES SET IN THIS SCOPE #
|
| 15 |
# ---- ENV VARIABLES ---- #
|
|
|
|
| 36 |
3. Load HuggingFace Embeddings (remember to use the URL we set above)
|
| 37 |
4. Index Files if they do not exist, otherwise load the vectorstore
|
| 38 |
"""
|
| 39 |
+
# Loop through all the pdf documents in the folder data
|
| 40 |
+
def load_pdfdocuments(self,path: str):
|
| 41 |
+
self.documents = []
|
| 42 |
+
return PyMuPDFLoader("data/airbnb-10k.pdf").load()
|
| 43 |
+
|
| 44 |
+
#Load the Pdf Documents from airbnb-10k
|
| 45 |
+
documents = load_pdfdocuments()
|
| 46 |
|
| 47 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
| 48 |
text_splitter = RecursiveCharacterTextSplitter(
|
|
|
|
| 132 |
"""
|
| 133 |
This function can be used to rename the 'author' of a message.
|
| 134 |
|
| 135 |
+
In this case, we're overriding the 'Assistant' author to be 'AirBnb LLM Assistant'.
|
| 136 |
"""
|
| 137 |
rename_dict = {
|
| 138 |
+
"Assistant" : "AirBnB LLM Assitant"
|
| 139 |
}
|
| 140 |
return rename_dict.get(original_author, original_author)
|
| 141 |
|
data/airbnb-10k.pdf
ADDED
|
Binary file (596 kB). View file
|
|
|