Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,7 +4,7 @@ from dotenv import load_dotenv
|
|
| 4 |
import streamlit as st
|
| 5 |
from PyPDF2 import PdfReader
|
| 6 |
from langchain.text_splitter import CharacterTextSplitter
|
| 7 |
-
from
|
| 8 |
from langchain.vectorstores import FAISS
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
|
@@ -25,7 +25,7 @@ def get_pdf_text(pdf_docs):
|
|
| 25 |
for pdf in pdf_docs:
|
| 26 |
pdf_reader = PdfReader(pdf)
|
| 27 |
for page in pdf_reader.pages:
|
| 28 |
-
text += page.extract_text()
|
| 29 |
return text
|
| 30 |
|
| 31 |
# Function to split the extracted text into chunks
|
|
@@ -41,7 +41,7 @@ def get_text_chunks(text):
|
|
| 41 |
|
| 42 |
# Function to create a FAISS vectorstore using Hugging Face embeddings
|
| 43 |
def get_vectorstore(text_chunks):
|
| 44 |
-
embeddings =
|
| 45 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 46 |
return vectorstore
|
| 47 |
|
|
|
|
| 4 |
import streamlit as st
|
| 5 |
from PyPDF2 import PdfReader
|
| 6 |
from langchain.text_splitter import CharacterTextSplitter
|
| 7 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
| 8 |
from langchain.vectorstores import FAISS
|
| 9 |
from langchain.memory import ConversationBufferMemory
|
| 10 |
from langchain.chains import ConversationalRetrievalChain
|
|
|
|
| 25 |
for pdf in pdf_docs:
|
| 26 |
pdf_reader = PdfReader(pdf)
|
| 27 |
for page in pdf_reader.pages:
|
| 28 |
+
text += page.extract_text() or ""
|
| 29 |
return text
|
| 30 |
|
| 31 |
# Function to split the extracted text into chunks
|
|
|
|
| 41 |
|
| 42 |
# Function to create a FAISS vectorstore using Hugging Face embeddings
|
| 43 |
def get_vectorstore(text_chunks):
|
| 44 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
| 45 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
| 46 |
return vectorstore
|
| 47 |
|