Spaces:
Runtime error
Runtime error
Commit Β·
5df44d2
1
Parent(s): 618daf3
Update app.py
Browse files
app.py
CHANGED
|
@@ -3,13 +3,22 @@ import fitz
|
|
| 3 |
import re
|
| 4 |
import numpy as np
|
| 5 |
import tensorflow_hub as hub
|
| 6 |
-
from transformers import AutoModelForCausalLM,
|
|
|
|
| 7 |
import gradio as gr
|
| 8 |
import os
|
| 9 |
from sklearn.neighbors import NearestNeighbors
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
def download_pdf(url, output_path):
|
| 15 |
urllib.request.urlretrieve(url, output_path)
|
|
@@ -38,7 +47,6 @@ def pdf_to_text(path, start_page=1, end_page=None):
|
|
| 38 |
|
| 39 |
def text_to_chunks(texts, word_length=150, start_page=1):
|
| 40 |
text_toks = [t.split(' ') for t in texts]
|
| 41 |
-
page_nums = []
|
| 42 |
chunks = []
|
| 43 |
|
| 44 |
for idx, words in enumerate(text_toks):
|
|
@@ -92,9 +100,15 @@ def load_recommender(path, start_page=1):
|
|
| 92 |
return 'Corpus Loaded.'
|
| 93 |
|
| 94 |
def generate_text(prompt, max_length=512):
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
return message
|
| 99 |
|
| 100 |
def generate_answer(question):
|
|
@@ -114,7 +128,7 @@ def generate_answer(question):
|
|
| 114 |
"answer should be short and concise. \n\nQuery: {question}\nAnswer: "
|
| 115 |
|
| 116 |
prompt += f"Query: {question}\nAnswer:"
|
| 117 |
-
answer = generate_text(prompt)
|
| 118 |
return answer
|
| 119 |
|
| 120 |
def question_answer(url, file, question):
|
|
@@ -144,7 +158,7 @@ def question_answer(url, file, question):
|
|
| 144 |
recommender = SemanticSearch()
|
| 145 |
|
| 146 |
title = 'PDF GPT'
|
| 147 |
-
description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than
|
| 148 |
|
| 149 |
with gr.Blocks() as demo:
|
| 150 |
|
|
@@ -165,4 +179,5 @@ with gr.Blocks() as demo:
|
|
| 165 |
answer = gr.Textbox(label='The answer to your question is :')
|
| 166 |
|
| 167 |
btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
|
|
|
|
| 168 |
demo.launch()
|
|
|
|
| 3 |
import re
|
| 4 |
import numpy as np
|
| 5 |
import tensorflow_hub as hub
|
| 6 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 7 |
+
import torch
|
| 8 |
import gradio as gr
|
| 9 |
import os
|
| 10 |
from sklearn.neighbors import NearestNeighbors
|
| 11 |
|
| 12 |
+
model_name = "tiiuae/falcon-40b-instruct"
|
| 13 |
+
|
| 14 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 15 |
+
text_gen = pipeline(
|
| 16 |
+
"text-generation",
|
| 17 |
+
model=model_name,
|
| 18 |
+
tokenizer=tokenizer,
|
| 19 |
+
torch_dtype=torch.bfloat16,
|
| 20 |
+
device_map="auto",
|
| 21 |
+
)
|
| 22 |
|
| 23 |
def download_pdf(url, output_path):
|
| 24 |
urllib.request.urlretrieve(url, output_path)
|
|
|
|
| 47 |
|
| 48 |
def text_to_chunks(texts, word_length=150, start_page=1):
|
| 49 |
text_toks = [t.split(' ') for t in texts]
|
|
|
|
| 50 |
chunks = []
|
| 51 |
|
| 52 |
for idx, words in enumerate(text_toks):
|
|
|
|
| 100 |
return 'Corpus Loaded.'
|
| 101 |
|
| 102 |
def generate_text(prompt, max_length=512):
|
| 103 |
+
sequences = text_gen(
|
| 104 |
+
prompt,
|
| 105 |
+
max_length=max_length,
|
| 106 |
+
do_sample=True,
|
| 107 |
+
top_k=10,
|
| 108 |
+
num_return_sequences=1,
|
| 109 |
+
eos_token_id=tokenizer.eos_token_id,
|
| 110 |
+
)
|
| 111 |
+
message = sequences[0]['generated_text']
|
| 112 |
return message
|
| 113 |
|
| 114 |
def generate_answer(question):
|
|
|
|
| 128 |
"answer should be short and concise. \n\nQuery: {question}\nAnswer: "
|
| 129 |
|
| 130 |
prompt += f"Query: {question}\nAnswer:"
|
| 131 |
+
answer = generate_text(prompt, 512)
|
| 132 |
return answer
|
| 133 |
|
| 134 |
def question_answer(url, file, question):
|
|
|
|
| 158 |
recommender = SemanticSearch()
|
| 159 |
|
| 160 |
title = 'PDF GPT'
|
| 161 |
+
description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than GPT-3. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
|
| 162 |
|
| 163 |
with gr.Blocks() as demo:
|
| 164 |
|
|
|
|
| 179 |
answer = gr.Textbox(label='The answer to your question is :')
|
| 180 |
|
| 181 |
btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
|
| 182 |
+
|
| 183 |
demo.launch()
|