File size: 3,569 Bytes
d581fc9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import os
import sys
import gradio.inputs as gr_inputs
import gradio.outputs as gr_outputs
import gradio as gr
from pinecone import Pinecone, ServerlessSpec
from langchain_community.llms import Replicate
from langchain_pinecone import PineconeVectorStore
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain.chains import ConversationalRetrievalChain
import time

key1 = os.environ.get('REPLICATE_API_TOKEN')
key2 = os.environ.get('PINECONE_API_KEY')
os.environ['REPLICATE_API_TOKEN'] = key1
os.environ["PINECONE_API_KEY"] = key2

# Initialize Pinecone
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

# Function to process PDF and set up chatbot
def process_pdf(pdf_doc):
    # Save uploaded file
    filename = pdf_doc.name
    pdf_doc.save(filename)

    # Load PDF and create index
    loader = PyPDFLoader(filename)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    
    embeddings = HuggingFaceEmbeddings()

    index_name = "pdfchatbot"
    existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]

    if index_name in existing_indexes:
        pc.delete_index(index_name)
        while index_name in [index_info["name"] for index_info in pc.list_indexes()]:
            time.sleep(1)

    pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
    )
    while not pc.describe_index(index_name).status["ready"]:
        time.sleep(1)

    index = pc.Index(index_name)

    vectordb = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)

    llm = Replicate(
        model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
        input={"temperature": 0.75, "max_length": 3000}
    )

    global qa_chain
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm,
        vectordb.as_retriever(search_kwargs={'k': 2}),
        return_source_documents=True
    )

    return "Ready"

# Function to handle user queries
def query(history, text):
    langchain_history = [(msg[1], history[i+1][1] if i+1 < len(history) else "") for i, msg in enumerate(history) if i % 2 == 0]
    result = qa_chain({"question": text, "chat_history": langchain_history})
    new_history = history + [(text,result['answer'])]
    return new_history, ""

# Define the Gradio interface
css = """
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""

title_html = """
<div style="text-align: center;max-width: 700px;">
    <h1>Chat with PDF</h1>   
"""

iface = gr.Interface(
    fn=process_pdf,
    inputs=gr_inputs.File(label="Load a PDF", type="file", accept=".pdf"),
    outputs=gr_outputs.Textbox(label="Status", type="auto", default=""),
    title="PDF Chatbot Interface",
    description="Upload a PDF file to start interacting with the chatbot.",
    allow_flagging=False,
    css=css
)

# Add chat history and question input to the interface
chatbot_interface = gr.Interface(
    fn=query,
    inputs=gr_inputs.Textbox(label="Question", placeholder="Type your question and hit Enter"),
    outputs=gr_outputs.Textbox(label="Chat History", type="auto", default=""),
    title=title_html,
    live=True,
    css=css
)

# Launch the combined interface
iface.launch()
chatbot_interface.launch()