Bofandra commited on
Commit
5934f56
·
verified ·
1 Parent(s): 01a89b1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -61
app.py CHANGED
@@ -1,96 +1,75 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
3
  from langchain.chains import ConversationalRetrievalChain
4
- from langchain.vectorstores import FAISS
5
- from langchain.embeddings import HuggingFaceEmbeddings
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain.document_loaders import PyPDFLoader
8
- from langchain_community.llms import HuggingFaceHub
9
- import tempfile
10
-
11
- # Initialize global variables
12
- vectorstore = None
13
  retrieval_chain = None
 
14
 
15
 
16
- def process_pdf(file):
17
- global vectorstore, retrieval_chain
18
 
19
- tmp_path = file.name
20
- loader = PyPDFLoader(tmp_path)
21
  documents = loader.load()
22
 
23
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
24
- docs = text_splitter.split_documents(documents)
25
-
26
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
27
- vectorstore = FAISS.from_documents(docs, embeddings)
28
-
29
- retriever = vectorstore.as_retriever(search_kwargs={"k": 4})
30
 
31
- # Wrap DeepSeek model properly
32
- llm = HuggingFaceHub(
33
  repo_id="deepseek-ai/DeepSeek-R1-0528",
34
- model_kwargs={"temperature": 0.7, "max_new_tokens": 512}
 
35
  )
36
 
37
  retrieval_chain = ConversationalRetrievalChain.from_llm(
38
  llm=llm,
39
- retriever=retriever,
40
- return_source_documents=True
41
  )
42
 
43
  return "PDF processed. You can now ask questions!"
44
 
45
 
46
- def respond(
47
- message,
48
- history: list[dict[str, str]],
49
- system_message,
50
- max_tokens,
51
- temperature,
52
- top_p,
53
- hf_token: gr.OAuthToken,
54
- ):
55
- global retrieval_chain
56
 
57
  if retrieval_chain is None:
58
  return "Please upload a PDF first."
59
 
60
- # Reformat history for LangChain
61
- chat_history = [(h["content"], h.get("response", "")) for h in history if h["role"] == "user"]
62
-
63
- result = retrieval_chain({"question": message, "chat_history": chat_history})
64
-
65
- return result["answer"]
66
 
 
 
 
67
 
68
- chatbot = gr.ChatInterface(
69
- respond,
70
- type="messages",
71
- additional_inputs=[
72
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
73
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
74
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
75
- gr.Slider(
76
- minimum=0.1,
77
- maximum=1.0,
78
- value=0.95,
79
- step=0.05,
80
- label="Top-p (nucleus sampling)",
81
- ),
82
- ],
83
- )
84
 
85
  with gr.Blocks() as demo:
86
  with gr.Sidebar():
87
- gr.LoginButton()
88
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
89
- status = gr.Textbox(label="Status", interactive=False)
90
- pdf_upload.upload(process_pdf, inputs=pdf_upload, outputs=status)
91
 
92
- chatbot.render()
 
 
 
93
 
 
 
 
 
 
94
 
95
  if __name__ == "__main__":
96
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_community.embeddings import HuggingFaceEmbeddings
7
  from langchain.chains import ConversationalRetrievalChain
8
+ from langchain_huggingface import HuggingFaceEndpoint
9
+
 
 
 
 
 
 
 
10
  retrieval_chain = None
11
+ chat_history = []
12
 
13
 
14
+ def process_pdf(file, hf_token):
15
+ global retrieval_chain
16
 
17
+ # Load and split PDF
18
+ loader = PyPDFLoader(file.name)
19
  documents = loader.load()
20
 
21
+ # Embed with sentence transformers
 
 
22
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
23
+ vectorstore = FAISS.from_documents(documents, embeddings)
 
 
24
 
25
+ # Use HuggingFaceEndpoint instead of HuggingFaceHub
26
+ llm = HuggingFaceEndpoint(
27
  repo_id="deepseek-ai/DeepSeek-R1-0528",
28
+ huggingfacehub_api_token=hf_token,
29
+ task="text-generation",
30
  )
31
 
32
  retrieval_chain = ConversationalRetrievalChain.from_llm(
33
  llm=llm,
34
+ retriever=vectorstore.as_retriever()
 
35
  )
36
 
37
  return "PDF processed. You can now ask questions!"
38
 
39
 
40
+ def respond(message, history: list[dict[str, str]], hf_token: gr.OAuthToken):
41
+ global retrieval_chain, chat_history
 
 
 
 
 
 
 
 
42
 
43
  if retrieval_chain is None:
44
  return "Please upload a PDF first."
45
 
46
+ # Use invoke() instead of deprecated __call__
47
+ result = retrieval_chain.invoke({
48
+ "question": message,
49
+ "chat_history": chat_history
50
+ })
 
51
 
52
+ answer = result["answer"]
53
+ chat_history.append((message, answer))
54
+ return answer
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  with gr.Blocks() as demo:
58
  with gr.Sidebar():
59
+ hf_login = gr.LoginButton()
60
  pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
61
+ status_box = gr.Textbox(label="Status", interactive=False)
 
62
 
63
+ chatbot = gr.ChatInterface(
64
+ respond,
65
+ type="messages"
66
+ )
67
 
68
+ pdf_upload.upload(
69
+ fn=lambda file, token: process_pdf(file, token.token),
70
+ inputs=[pdf_upload, hf_login],
71
+ outputs=[status_box]
72
+ )
73
 
74
  if __name__ == "__main__":
75
+ demo.launch()