bhaskartripathi commited on
Commit
5df44d2
Β·
1 Parent(s): 618daf3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -9
app.py CHANGED
@@ -3,13 +3,22 @@ import fitz
3
  import re
4
  import numpy as np
5
  import tensorflow_hub as hub
6
- from transformers import AutoModelForCausalLM, AutoTokenizer
 
7
  import gradio as gr
8
  import os
9
  from sklearn.neighbors import NearestNeighbors
10
 
11
- tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-40b-instruct")
12
- model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b-instruct")
 
 
 
 
 
 
 
 
13
 
14
  def download_pdf(url, output_path):
15
  urllib.request.urlretrieve(url, output_path)
@@ -38,7 +47,6 @@ def pdf_to_text(path, start_page=1, end_page=None):
38
 
39
  def text_to_chunks(texts, word_length=150, start_page=1):
40
  text_toks = [t.split(' ') for t in texts]
41
- page_nums = []
42
  chunks = []
43
 
44
  for idx, words in enumerate(text_toks):
@@ -92,9 +100,15 @@ def load_recommender(path, start_page=1):
92
  return 'Corpus Loaded.'
93
 
94
  def generate_text(prompt, max_length=512):
95
- inputs = tokenizer(prompt, return_tensors="pt")
96
- outputs = model.generate(**inputs, max_length=max_length)
97
- message = tokenizer.decode(outputs[0])
 
 
 
 
 
 
98
  return message
99
 
100
  def generate_answer(question):
@@ -114,7 +128,7 @@ def generate_answer(question):
114
  "answer should be short and concise. \n\nQuery: {question}\nAnswer: "
115
 
116
  prompt += f"Query: {question}\nAnswer:"
117
- answer = generate_text(prompt)
118
  return answer
119
 
120
  def question_answer(url, file, question):
@@ -144,7 +158,7 @@ def question_answer(url, file, question):
144
  recommender = SemanticSearch()
145
 
146
  title = 'PDF GPT'
147
- description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
148
 
149
  with gr.Blocks() as demo:
150
 
@@ -165,4 +179,5 @@ with gr.Blocks() as demo:
165
  answer = gr.Textbox(label='The answer to your question is :')
166
 
167
  btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
 
168
  demo.launch()
 
3
  import re
4
  import numpy as np
5
  import tensorflow_hub as hub
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
+ import torch
8
  import gradio as gr
9
  import os
10
  from sklearn.neighbors import NearestNeighbors
11
 
12
+ model_name = "tiiuae/falcon-40b-instruct"
13
+
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ text_gen = pipeline(
16
+ "text-generation",
17
+ model=model_name,
18
+ tokenizer=tokenizer,
19
+ torch_dtype=torch.bfloat16,
20
+ device_map="auto",
21
+ )
22
 
23
  def download_pdf(url, output_path):
24
  urllib.request.urlretrieve(url, output_path)
 
47
 
48
  def text_to_chunks(texts, word_length=150, start_page=1):
49
  text_toks = [t.split(' ') for t in texts]
 
50
  chunks = []
51
 
52
  for idx, words in enumerate(text_toks):
 
100
  return 'Corpus Loaded.'
101
 
102
  def generate_text(prompt, max_length=512):
103
+ sequences = text_gen(
104
+ prompt,
105
+ max_length=max_length,
106
+ do_sample=True,
107
+ top_k=10,
108
+ num_return_sequences=1,
109
+ eos_token_id=tokenizer.eos_token_id,
110
+ )
111
+ message = sequences[0]['generated_text']
112
  return message
113
 
114
  def generate_answer(question):
 
128
  "answer should be short and concise. \n\nQuery: {question}\nAnswer: "
129
 
130
  prompt += f"Query: {question}\nAnswer:"
131
+ answer = generate_text(prompt, 512)
132
  return answer
133
 
134
  def question_answer(url, file, question):
 
158
  recommender = SemanticSearch()
159
 
160
  title = 'PDF GPT'
161
+ description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than GPT-3. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
162
 
163
  with gr.Blocks() as demo:
164
 
 
179
  answer = gr.Textbox(label='The answer to your question is :')
180
 
181
  btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
182
+
183
  demo.launch()