File size: 3,600 Bytes
d581fc9
 
 
 
 
 
 
 
 
 
 
60519ed
 
 
d581fc9
 
60519ed
d581fc9
 
 
ed30da8
d581fc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15255e8
d581fc9
 
 
 
 
60519ed
d581fc9
 
 
 
 
 
 
14b50e3
60519ed
d581fc9
60519ed
d581fc9
 
60519ed
 
 
 
15255e8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import os
import gradio as gr
from pinecone import Pinecone, ServerlessSpec
from langchain_community.llms import Replicate
from langchain_pinecone import PineconeVectorStore
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
import time

# Retrieve API keys from environment variables
replicate_api_token = os.getenv('REPLICATE_API_TOKEN')
pinecone_api_key = os.getenv('PINECONE_API_KEY')

# Initialize Pinecone
pc = Pinecone(api_key=pinecone_api_key)

# Function to process PDF and set up chatbot
def process_pdf(pdf_doc):
    # Use the file path directly
    filename = pdf_doc.name

    # Load PDF and create index
    loader = PyPDFLoader(filename)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    
    embeddings = HuggingFaceEmbeddings()

    index_name = "pdfchatbot"
    existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

    if index_name in existing_indexes:
        pc.delete_index(index_name)
        while index_name in [index_info["name"] for index_info in pc.list_indexes()]:
            time.sleep(1)

    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

    index = pc.Index(index_name)

    vectordb = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)

    llm = Replicate(
        model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
        input={"temperature": 0.75, "max_length": 3000}
    )

    global qa_chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        vectordb.as_retriever(search_kwargs={'k': 2}),
        return_source_documents=True
    )

    return "PDF processed and ready for queries."

# Function to handle user queries
def query(history, text):
    langchain_history = [(msg[1], history[i+1][1] if i+1 < len(history) else "") for i, msg in enumerate(history) if i % 2 == 0]
    result = qa_chain({"question": text, "chat_history": langchain_history})
    new_history = history + [(text, result['answer'])]
    return new_history, ""

# Define the Gradio interface
css = """
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title = """
<div style="text-align: center; max-width: 700px;">
    <h1>Chat with PDF</h1>   
</div>
"""

def pdf_changes(pdf_doc):
    result = process_pdf(pdf_doc)
    return result

with gr.Blocks(css=css) as iface:
    with gr.Column(elem_id="col-container"):
        gr.Markdown(title)
        pdf_upload = gr.File(label="Upload PDF", file_types=['.pdf'])
        process_button = gr.Button("Process PDF")
        process_status = gr.Textbox(label="Status", interactive=False)
        history = gr.State([])
        with gr.Row():
            chatbot = gr.Chatbot(label="PDF Chatbot")
        user_input = gr.Textbox(label="Your Question", placeholder="Type your question and hit Enter")
        clear_button = gr.Button("Clear History")

        process_button.click(pdf_changes, inputs=pdf_upload, outputs=process_status)
        user_input.submit(query, [history, user_input], [chatbot, user_input])
        clear_button.click(lambda: [], None, chatbot)

iface.launch()