Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,8 +32,25 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 32 |
|
| 33 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 34 |
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
retriever = db.as_retriever(
|
| 38 |
search_type="mmr",
|
| 39 |
search_kwargs={'k': 1}
|
|
|
|
| 32 |
|
| 33 |
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 34 |
model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
|
| 35 |
+
from langchain_community.document_loaders import TextLoader
|
| 36 |
+
from langchain_text_splitters import CharacterTextSplitter
|
| 37 |
+
from google.colab import drive
|
| 38 |
+
from langchain.document_loaders import PyPDFLoader, OnlinePDFLoader
|
| 39 |
+
|
| 40 |
+
# Montez Google Drive
|
| 41 |
+
loader = PyPDFLoader("test-1.pdf")
|
| 42 |
+
data = loader.load()
|
| 43 |
+
# split the documents into chunks
|
| 44 |
+
text_splitter1 = CharacterTextSplitter(chunk_size=512, chunk_overlap=0,separator="\n\n")
|
| 45 |
+
texts = text_splitter1.split_documents(data)
|
| 46 |
+
db = FAISS.from_documents(texts,
|
| 47 |
+
HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L12-v2'))
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
# Connect query to FAISS index using a retriever
|
| 51 |
+
retriever = db.as_retriever(
|
| 52 |
+
search_type="mmr",
|
| 53 |
+
search_kwargs={'k': 1}
|
| 54 |
retriever = db.as_retriever(
|
| 55 |
search_type="mmr",
|
| 56 |
search_kwargs={'k': 1}
|