Bofandra commited on
Commit
e481fe4
·
verified ·
1 Parent(s): 5934f56

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -37
app.py CHANGED
@@ -1,75 +1,89 @@
 
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
  from langchain_community.vectorstores import FAISS
5
- from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_community.embeddings import HuggingFaceEmbeddings
7
- from langchain.chains import ConversationalRetrievalChain
8
  from langchain_huggingface import HuggingFaceEndpoint
 
9
 
 
10
  retrieval_chain = None
11
  chat_history = []
12
 
 
 
 
 
13
 
14
- def process_pdf(file, hf_token):
 
15
  global retrieval_chain
16
 
 
 
 
 
17
  # Load and split PDF
18
  loader = PyPDFLoader(file.name)
19
  documents = loader.load()
20
 
21
- # Embed with sentence transformers
22
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
23
  vectorstore = FAISS.from_documents(documents, embeddings)
24
 
25
- # Use HuggingFaceEndpoint instead of HuggingFaceHub
 
 
26
  llm = HuggingFaceEndpoint(
27
- repo_id="deepseek-ai/DeepSeek-R1-0528",
28
  huggingfacehub_api_token=hf_token,
29
- task="text-generation",
30
- )
31
-
32
- retrieval_chain = ConversationalRetrievalChain.from_llm(
33
- llm=llm,
34
- retriever=vectorstore.as_retriever()
35
  )
 
36
 
37
- return "PDF processed. You can now ask questions!"
38
 
39
 
40
- def respond(message, history: list[dict[str, str]], hf_token: gr.OAuthToken):
 
41
  global retrieval_chain, chat_history
42
 
43
- if retrieval_chain is None:
44
- return "Please upload a PDF first."
 
45
 
46
- # Use invoke() instead of deprecated __call__
47
- result = retrieval_chain.invoke({
48
- "question": message,
49
- "chat_history": chat_history
50
- })
51
 
 
 
52
  answer = result["answer"]
 
53
  chat_history.append((message, answer))
54
- return answer
55
 
56
 
 
57
  with gr.Blocks() as demo:
58
- with gr.Sidebar():
59
- hf_login = gr.LoginButton()
60
- pdf_upload = gr.File(label="Upload PDF", file_types=[".pdf"])
61
- status_box = gr.Textbox(label="Status", interactive=False)
62
-
63
- chatbot = gr.ChatInterface(
64
- respond,
65
- type="messages"
 
 
 
 
66
  )
67
 
68
- pdf_upload.upload(
69
- fn=lambda file, token: process_pdf(file, token.token),
70
- inputs=[pdf_upload, hf_login],
71
- outputs=[status_box]
72
  )
73
 
 
74
  if __name__ == "__main__":
75
- demo.launch()
 
1
+ import os
2
  import gradio as gr
 
 
3
  from langchain_community.vectorstores import FAISS
 
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from langchain_community.document_loaders import PyPDFLoader
6
  from langchain_huggingface import HuggingFaceEndpoint
7
+ from langchain.chains import ConversationalRetrievalChain
8
 
9
+ # Global retrieval chain + history
10
  retrieval_chain = None
11
  chat_history = []
12
 
13
+ # Utility to fetch token (prefer user > fallback to env)
14
+ def get_hf_token(user_token: str | None = None) -> str | None:
15
+ return user_token.strip() if user_token and user_token.strip() else os.getenv("HUGGINGFACEHUB_API_TOKEN")
16
+
17
 
18
+ # Step 1: Process PDF
19
+ def process_pdf(file, token):
20
  global retrieval_chain
21
 
22
+ hf_token = get_hf_token(token)
23
+ if not hf_token:
24
+ return "❌ No Hugging Face API token provided."
25
+
26
  # Load and split PDF
27
  loader = PyPDFLoader(file.name)
28
  documents = loader.load()
29
 
30
+ # Embed documents
31
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
32
  vectorstore = FAISS.from_documents(documents, embeddings)
33
 
34
+ retriever = vectorstore.as_retriever()
35
+
36
+ # Build retrieval chain with DeepSeek model
37
  llm = HuggingFaceEndpoint(
38
+ repo_id="deepseek-ai/deepseek-llm-R1-0528",
39
  huggingfacehub_api_token=hf_token,
 
 
 
 
 
 
40
  )
41
+ retrieval_chain = ConversationalRetrievalChain.from_llm(llm, retriever=retriever)
42
 
43
+ return "PDF processed. You can now ask questions!"
44
 
45
 
46
+ # Step 2: Respond to user questions
47
+ def respond(message, history, token):
48
  global retrieval_chain, chat_history
49
 
50
+ hf_token = get_hf_token(token)
51
+ if not hf_token:
52
+ return "❌ No Hugging Face API token provided.", history
53
 
54
+ if retrieval_chain is None:
55
+ return "⚠️ Please upload and process a PDF first.", history
 
 
 
56
 
57
+ # Run query against retriever
58
+ result = retrieval_chain.invoke({"question": message, "chat_history": chat_history})
59
  answer = result["answer"]
60
+
61
  chat_history.append((message, answer))
62
+ return answer, chat_history
63
 
64
 
65
+ # Gradio UI
66
  with gr.Blocks() as demo:
67
+ with gr.Row():
68
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
69
+ token_input = gr.Textbox(label="HuggingFace Token (optional)", type="text")
70
+ process_btn = gr.Button("Process PDF")
71
+
72
+ chatbot = gr.Chatbot(label="Chat with your PDF")
73
+ msg = gr.Textbox(label="Ask a question")
74
+
75
+ process_btn.click(
76
+ fn=lambda file, token: process_pdf(file, token),
77
+ inputs=[pdf_input, token_input],
78
+ outputs=[]
79
  )
80
 
81
+ msg.submit(
82
+ fn=lambda message, history, token: respond(message, history, token),
83
+ inputs=[msg, chatbot, token_input],
84
+ outputs=[chatbot, chatbot]
85
  )
86
 
87
+
88
  if __name__ == "__main__":
89
+ demo.launch()