Amjad commited on
Commit
3648580
·
1 Parent(s): f2ee95a

Add PDF RAG system

Browse files
Files changed (2) hide show
  1. app.py +17 -8
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,11 +1,16 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
3
 
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -14,7 +19,13 @@ def respond(
14
  max_tokens,
15
  temperature,
16
  top_p,
 
17
  ):
 
 
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
@@ -39,9 +50,7 @@ def respond(
39
  response += token
40
  yield response
41
 
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
  demo = gr.ChatInterface(
46
  respond,
47
  additional_inputs=[
@@ -55,9 +64,9 @@ demo = gr.ChatInterface(
55
  step=0.05,
56
  label="Top-p (nucleus sampling)",
57
  ),
 
58
  ],
59
  )
60
 
61
-
62
  if __name__ == "__main__":
63
- demo.launch()
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ import fitz # PyMuPDF
4
 
5
+ # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 
 
6
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
7
 
8
+ def extract_text_from_pdf(pdf_path):
9
+ doc = fitz.open(pdf_path)
10
+ text = ""
11
+ for page in doc:
12
+ text += page.get_text()
13
+ return text
14
 
15
  def respond(
16
  message,
 
19
  max_tokens,
20
  temperature,
21
  top_p,
22
+ pdf_file
23
  ):
24
+ # Extract text from the uploaded PDF
25
+ if pdf_file:
26
+ pdf_text = extract_text_from_pdf(pdf_file.name)
27
+ system_message += f"\n\nContext from PDF:\n{pdf_text}"
28
+
29
  messages = [{"role": "system", "content": system_message}]
30
 
31
  for val in history:
 
50
  response += token
51
  yield response
52
 
53
+ # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
 
54
  demo = gr.ChatInterface(
55
  respond,
56
  additional_inputs=[
 
64
  step=0.05,
65
  label="Top-p (nucleus sampling)",
66
  ),
67
+ gr.File(label="Upload PDF")
68
  ],
69
  )
70
 
 
71
  if __name__ == "__main__":
72
+ demo.launch()
requirements.txt CHANGED
@@ -1 +1,4 @@
1
- huggingface_hub==0.22.2
 
 
 
 
1
+ transformers
2
+ gradio
3
+ huggingface_hub
4
+ pymupdf