Update app.py
Browse files
app.py
CHANGED
|
@@ -5,6 +5,7 @@ from langchain.vectorstores import FAISS
|
|
| 5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
from langchain.document_loaders import PyPDFLoader
|
|
|
|
| 8 |
import tempfile
|
| 9 |
|
| 10 |
# Initialize global variables
|
|
@@ -15,29 +16,26 @@ retrieval_chain = None
|
|
| 15 |
def process_pdf(file):
|
| 16 |
global vectorstore, retrieval_chain
|
| 17 |
|
| 18 |
-
|
| 19 |
-
tmp_path = file.name
|
| 20 |
-
|
| 21 |
-
# Load PDF
|
| 22 |
loader = PyPDFLoader(tmp_path)
|
| 23 |
documents = loader.load()
|
| 24 |
|
| 25 |
-
# Split into chunks
|
| 26 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 27 |
docs = text_splitter.split_documents(documents)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
embeddings = HuggingFaceEmbeddings()
|
| 31 |
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
retriever = vectorstore.as_retriever()
|
| 35 |
-
retriever.search_kwargs["k"] = 4
|
| 36 |
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
retrieval_chain = ConversationalRetrievalChain.from_llm(
|
| 40 |
-
llm=
|
| 41 |
retriever=retriever,
|
| 42 |
return_source_documents=True
|
| 43 |
)
|
|
|
|
| 5 |
from langchain.embeddings import HuggingFaceEmbeddings
|
| 6 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
| 7 |
from langchain.document_loaders import PyPDFLoader
|
| 8 |
+
from langchain_community.llms import HuggingFaceHub
|
| 9 |
import tempfile
|
| 10 |
|
| 11 |
# Initialize global variables
|
|
|
|
| 16 |
def process_pdf(file):
|
| 17 |
global vectorstore, retrieval_chain
|
| 18 |
|
| 19 |
+
tmp_path = file.name
|
|
|
|
|
|
|
|
|
|
| 20 |
loader = PyPDFLoader(tmp_path)
|
| 21 |
documents = loader.load()
|
| 22 |
|
|
|
|
| 23 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
|
| 24 |
docs = text_splitter.split_documents(documents)
|
| 25 |
|
| 26 |
+
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
|
|
|
|
| 27 |
vectorstore = FAISS.from_documents(docs, embeddings)
|
| 28 |
|
| 29 |
+
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
# ✅ Wrap DeepSeek model properly
|
| 32 |
+
llm = HuggingFaceHub(
|
| 33 |
+
repo_id="deepseek-ai/DeepSeek-R1-0528",
|
| 34 |
+
model_kwargs={"temperature": 0.7, "max_new_tokens": 512}
|
| 35 |
+
)
|
| 36 |
|
| 37 |
retrieval_chain = ConversationalRetrievalChain.from_llm(
|
| 38 |
+
llm=llm,
|
| 39 |
retriever=retriever,
|
| 40 |
return_source_documents=True
|
| 41 |
)
|