Bofandra commited on
Commit
7f8ff79
·
verified ·
1 Parent(s): 9f83f75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -5,6 +5,7 @@ from langchain.vectorstores import FAISS
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.document_loaders import PyPDFLoader
 
8
  import tempfile
9
 
10
  # Initialize global variables
@@ -15,29 +16,26 @@ retrieval_chain = None
15
  def process_pdf(file):
16
  global vectorstore, retrieval_chain
17
 
18
- # `file` is a gradio.NamedString → use file.name to get the path
19
- tmp_path = file.name
20
-
21
- # Load PDF
22
  loader = PyPDFLoader(tmp_path)
23
  documents = loader.load()
24
 
25
- # Split into chunks
26
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
27
  docs = text_splitter.split_documents(documents)
28
 
29
- # Create embeddings and FAISS vectorstore
30
- embeddings = HuggingFaceEmbeddings()
31
  vectorstore = FAISS.from_documents(docs, embeddings)
32
 
33
- # Setup retrieval chain
34
- retriever = vectorstore.as_retriever()
35
- retriever.search_kwargs["k"] = 4
36
 
37
- client = InferenceClient(model="deepseek-ai/DeepSeek-R1-0528")
 
 
 
 
38
 
39
  retrieval_chain = ConversationalRetrievalChain.from_llm(
40
- llm=client,
41
  retriever=retriever,
42
  return_source_documents=True
43
  )
 
5
  from langchain.embeddings import HuggingFaceEmbeddings
6
  from langchain.text_splitter import RecursiveCharacterTextSplitter
7
  from langchain.document_loaders import PyPDFLoader
8
+ from langchain_community.llms import HuggingFaceHub
9
  import tempfile
10
 
11
  # Initialize global variables
 
16
  def process_pdf(file):
17
  global vectorstore, retrieval_chain
18
 
19
+ tmp_path = file.name
 
 
 
20
  loader = PyPDFLoader(tmp_path)
21
  documents = loader.load()
22
 
 
23
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
24
  docs = text_splitter.split_documents(documents)
25
 
26
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 
27
  vectorstore = FAISS.from_documents(docs, embeddings)
28
 
29
+ retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
 
 
30
 
31
+ # Wrap DeepSeek model properly
32
+ llm = HuggingFaceHub(
33
+ repo_id="deepseek-ai/DeepSeek-R1-0528",
34
+ model_kwargs={"temperature": 0.7, "max_new_tokens": 512}
35
+ )
36
 
37
  retrieval_chain = ConversationalRetrievalChain.from_llm(
38
+ llm=llm,
39
  retriever=retriever,
40
  return_source_documents=True
41
  )