Spaces:

lei0lei
/

chat-pdf

Sleeping

App Files Files Community

lei0lei commited on Nov 27, 2023

Commit

82ad063

1 Parent(s): c9e149f

.

Browse files

Files changed (4) hide show

app.py +21 -20
chain.py +131 -0
interface.py +47 -0
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -2,26 +2,27 @@
 # Demo code for hugging face
 import gradio as gr
-import random
-def chat(message, history):
-    history = history or []
-    if message.startswith("How many"):
-        response = random.randint(1, 10)
-    elif message.startswith("How"):
-        response = random.choice(["Great", "Good", "Okay", "Bad"])
-    elif message.startswith("Where"):
-        response = random.choice(["Here", "There", "Somewhere"])
-    else:
-        response = "I don't know"
-    history.append((message, response))
-    return history, history
-iface = gr.Interface(
-    chat,
-    ["text", "state"],
-    ["chatbot", "state"],
-    allow_flagging="never",
-)
-iface.launch()

 # Demo code for hugging face
 import gradio as gr
+import os
+from interface import create_demo
+from chain import set_api_key, enable_api_box, add_text, generate_response, render_file
+demo, api_key, change_api_key, chatbot, show_img, txt, submit_btn, btn = create_demo()
+# Set up event handlers
+with demo:
+    # Event handler for submitting the OpenAI API key
+    api_key.submit(set_api_key, inputs=[api_key], outputs=[api_key])
+    # Event handler for changing the API key
+    change_api_key.click(enable_api_box, outputs=[api_key])
+    # Event handler for uploading a PDF
+    btn.upload(render_file, inputs=[btn], outputs=[show_img])
+    # Event handler for submitting text and generating response
+    submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
+        success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot,txt]).\
+        success(render_file, inputs=[btn], outputs=[show_img])
+if __name__ == "__main__":
+    demo.launch()

chain.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import os
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.text_splitter import CharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.chains import ConversationalRetrievalChain
+from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import PyPDFLoader
+import fitz
+from PIL import Image
+import gradio as gr
+# Global variables
+count = 0
+n = 0
+chat_history = []
+chain = ''
+# Function to set the OpenAI API key
+def set_api_key(api_key):
+    """
+    Sets the OpenAI API key in the environment variable.
+    Args:
+        api_key (str): The OpenAI API key.
+    Returns:
+        str: Message indicating the API key is set.
+    """
+    os.environ['OPENAI_API_KEY'] = api_key
+    return 'OpenAI API key is set'
+# Function to enable the API key input box
+def enable_api_box():
+    """
+    Enables the API key input box.
+    Returns:
+        None
+    """
+    return
+# Function to add text to the chat history
+def add_text(history, text):
+    """
+    Adds the user's input text to the chat history.
+    Args:
+        history (list): List of tuples representing the chat history.
+        text (str): The user's input text.
+    Returns:
+        list: Updated chat history with the new user input.
+    """
+    if not text:
+        raise gr.Error('Enter text')
+    history.append((text, ''))
+    return history
+# Function to process the PDF file and create a conversation chain
+def process_file(file):
+    """
+    Processes the uploaded PDF file and creates a conversational retrieval chain.
+    Args:
+        file (FileStorage): The uploaded PDF file.
+    Returns:
+        ConversationalRetrievalChain: The created conversational retrieval chain.
+    """
+    if 'OPENAI_API_KEY' not in os.environ:
+        raise gr.Error('Upload your OpenAI API key')
+    loader = PyPDFLoader(file.name)
+    documents = loader.load()
+    embeddings = OpenAIEmbeddings()
+    pdf_search = Chroma.from_documents(documents, embeddings)
+    chain = ConversationalRetrievalChain.from_llm(ChatOpenAI(temperature=0.3),
+                                                  retriever=pdf_search.as_retriever(search_kwargs={"k": 1}),
+                                                  return_source_documents=True)
+    return chain
+# Function to generate a response based on the chat history and query
+def generate_response(history, query, btn):
+    """
+    Generates a response based on the chat history and user's query.
+    Args:
+        history (list): List of tuples representing the chat history.
+        query (str): The user's query.
+        btn (FileStorage): The uploaded PDF file.
+    Returns:
+        tuple: Updated chat history with the generated response and the next page number.
+    """
+    global count, n, chat_history, chain
+    if not btn:
+        raise gr.Error(message='Upload a PDF')
+    if count == 0:
+        chain = process_file(btn)
+        count += 1
+    result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
+    chat_history.append((query, result["answer"]))
+    n = list(result['source_documents'][0])[1][1]['page']
+    for char in result['answer']:
+        history[-1][-1] += char
+    return history, " "
+# Function to render a specific page of a PDF file as an image
+def render_file(file):
+    """
+    Renders a specific page of a PDF file as an image.
+    Args:
+        file (FileStorage): The PDF file.
+    Returns:
+        PIL.Image.Image: The rendered page as an image.
+    """
+    global n
+    doc = fitz.open(file.name)
+    page = doc[n]
+    # Render the page as a PNG image with a resolution of 300 DPI
+    pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
+    image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
+    return image

interface.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import gradio as gr
+# Gradio application setup
+def create_demo():
+    with gr.Blocks(title= " PDF Chatbot",
+        theme = "Soft"  # Change the theme here
+        ) as demo:
+        # Create a Gradio block
+        with gr.Column():
+            with gr.Row():
+                with gr.Column(scale=0.8):
+                    api_key = gr.Textbox(
+                        placeholder='Enter your OpenAI API key',
+                        show_label=False,
+                        interactive=True,
+                    container=False)
+                with gr.Column(scale=0.2):
+                    change_api_key = gr.Button('Update API Key')
+            with gr.Row():
+                chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
+                show_img = gr.Image(label='PDF Preview', height=680)
+                # show_img = gr.Image(label='PDF Preview', tool='select', height=680)
+        with gr.Row():
+            with gr.Column(scale=0.60):
+                text_input = gr.Textbox(
+                    show_label=False,
+                    placeholder="Ask your pdf?",
+                container=False)
+            with gr.Column(scale=0.20):
+                submit_btn = gr.Button('Send')
+            with gr.Column(scale=0.20):
+                upload_btn = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])
+        return demo, api_key, change_api_key, chatbot, show_img, text_input, submit_btn, upload_btn
+if __name__ == '__main__':
+    demo, api_key, change_api_key, chatbot, show_img, text_input, submit_btn, upload_btn = create_demo()
+    demo.queue()
+    demo.launch()

requirements.txt CHANGED Viewed

@@ -5,4 +5,6 @@ pydantic
 duckduckgo-search
 docarray
 huggingface_hub
-gradio==4.7.1

 duckduckgo-search
 docarray
 huggingface_hub
+gradio==4.7.1
+pymupdf
+uvicorn==0.24.0