Spaces:

bhaskartripathi
/

PDF-GPT_Falcon

Runtime error

App Files Files Community

bhaskartripathi commited on Jul 27, 2023

Commit

5df44d2

1 Parent(s): 618daf3

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -9

app.py CHANGED Viewed

@@ -3,13 +3,22 @@ import fitz
 import re
 import numpy as np
 import tensorflow_hub as hub
-from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import os
 from sklearn.neighbors import NearestNeighbors
-tokenizer = AutoTokenizer.from_pretrained("tiiuae/falcon-40b-instruct")
-model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b-instruct")
 def download_pdf(url, output_path):
     urllib.request.urlretrieve(url, output_path)
@@ -38,7 +47,6 @@ def pdf_to_text(path, start_page=1, end_page=None):
 def text_to_chunks(texts, word_length=150, start_page=1):
     text_toks = [t.split(' ') for t in texts]
-    page_nums = []
     chunks = []
     for idx, words in enumerate(text_toks):
@@ -92,9 +100,15 @@ def load_recommender(path, start_page=1):
     return 'Corpus Loaded.'
 def generate_text(prompt, max_length=512):
-    inputs = tokenizer(prompt, return_tensors="pt")
-    outputs = model.generate(**inputs, max_length=max_length)
-    message = tokenizer.decode(outputs[0])
     return message
 def generate_answer(question):
@@ -114,7 +128,7 @@ def generate_answer(question):
               "answer should be short and concise. \n\nQuery: {question}\nAnswer: "
     prompt += f"Query: {question}\nAnswer:"
-    answer = generate_text(prompt)
     return answer
 def question_answer(url, file, question):
@@ -144,7 +158,7 @@ def question_answer(url, file, question):
 recommender = SemanticSearch()
 title = 'PDF GPT'
-description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than OpenAI. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
 with gr.Blocks() as demo:
@@ -165,4 +179,5 @@ with gr.Blocks() as demo:
             answer = gr.Textbox(label='The answer to your question is :')
         btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
 demo.launch()

 import re
 import numpy as np
 import tensorflow_hub as hub
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
 import gradio as gr
 import os
 from sklearn.neighbors import NearestNeighbors
+model_name = "tiiuae/falcon-40b-instruct"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+text_gen = pipeline(
+    "text-generation",
+    model=model_name,
+    tokenizer=tokenizer,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+)
 def download_pdf(url, output_path):
     urllib.request.urlretrieve(url, output_path)
 def text_to_chunks(texts, word_length=150, start_page=1):
     text_toks = [t.split(' ') for t in texts]
     chunks = []
     for idx, words in enumerate(text_toks):
     return 'Corpus Loaded.'
 def generate_text(prompt, max_length=512):
+    sequences = text_gen(
+        prompt,
+        max_length=max_length,
+        do_sample=True,
+        top_k=10,
+        num_return_sequences=1,
+        eos_token_id=tokenizer.eos_token_id,
+    )
+    message = sequences[0]['generated_text']
     return message
 def generate_answer(question):
               "answer should be short and concise. \n\nQuery: {question}\nAnswer: "
     prompt += f"Query: {question}\nAnswer:"
+    answer = generate_text(prompt, 512)
     return answer
 def question_answer(url, file, question):
 recommender = SemanticSearch()
 title = 'PDF GPT'
+description = """ PDF GPT allows you to chat with your PDF file using Universal Sentence Encoder and Falcon. It gives hallucination free response than other tools as the embeddings are better than GPT-3. The returned response can even cite the page number in square brackets([]) where the information is located, adding credibility to the responses and helping to locate pertinent information quickly."""
 with gr.Blocks() as demo:
             answer = gr.Textbox(label='The answer to your question is :')
         btn.click(question_answer, inputs=[url, file, question], outputs=[answer])
 demo.launch()