Manglik-R commited on
Commit
d581fc9
·
verified ·
1 Parent(s): 29aba6d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import gradio.inputs as gr_inputs
4
+ import gradio.outputs as gr_outputs
5
+ import gradio as gr
6
+ from pinecone import Pinecone, ServerlessSpec
7
+ from langchain_community.llms import Replicate
8
+ from langchain_pinecone import PineconeVectorStore
9
+ from langchain.text_splitter import CharacterTextSplitter
10
+ from langchain_community.document_loaders import PyPDFLoader
11
+ from langchain_huggingface.embeddings import HuggingFaceEmbeddings
12
+ from langchain.chains import ConversationalRetrievalChain
13
+ import time
14
+
15
+ key1 = os.environ.get('REPLICATE_API_TOKEN')
16
+ key2 = os.environ.get('PINECONE_API_KEY')
17
+ os.environ['REPLICATE_API_TOKEN'] = key1
18
+ os.environ["PINECONE_API_KEY"] = key2
19
+
20
+ # Initialize Pinecone
21
+ pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])
22
+
23
+ # Function to process PDF and set up chatbot
24
+ def process_pdf(pdf_doc):
25
+ # Save uploaded file
26
+ filename = pdf_doc.name
27
+ pdf_doc.save(filename)
28
+
29
+ # Load PDF and create index
30
+ loader = PyPDFLoader(filename)
31
+ documents = loader.load()
32
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
33
+ texts = text_splitter.split_documents(documents)
34
+
35
+ embeddings = HuggingFaceEmbeddings()
36
+
37
+ index_name = "pdfchatbot"
38
+ existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
39
+
40
+ if index_name in existing_indexes:
41
+ pc.delete_index(index_name)
42
+ while index_name in [index_info["name"] for index_info in pc.list_indexes()]:
43
+ time.sleep(1)
44
+
45
+ pc.create_index(
46
+ name=index_name,
47
+ dimension=768,
48
+ metric="cosine",
49
+ spec=ServerlessSpec(cloud="aws", region="us-east-1"),
50
+ )
51
+ while not pc.describe_index(index_name).status["ready"]:
52
+ time.sleep(1)
53
+
54
+ index = pc.Index(index_name)
55
+
56
+ vectordb = PineconeVectorStore.from_documents(texts, embeddings, index_name=index_name)
57
+
58
+ llm = Replicate(
59
+ model="a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5",
60
+ input={"temperature": 0.75, "max_length": 3000}
61
+ )
62
+
63
+ global qa_chain
64
+ qa_chain = ConversationalRetrievalChain.from_llm(
65
+ llm,
66
+ vectordb.as_retriever(search_kwargs={'k': 2}),
67
+ return_source_documents=True
68
+ )
69
+
70
+ return "Ready"
71
+
72
+ # Function to handle user queries
73
+ def query(history, text):
74
+ langchain_history = [(msg[1], history[i+1][1] if i+1 < len(history) else "") for i, msg in enumerate(history) if i % 2 == 0]
75
+ result = qa_chain({"question": text, "chat_history": langchain_history})
76
+ new_history = history + [(text,result['answer'])]
77
+ return new_history, ""
78
+
79
+ # Define the Gradio interface
80
+ css = """
81
+ #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
82
+ """
83
+
84
+ title_html = """
85
+ <div style="text-align: center;max-width: 700px;">
86
+ <h1>Chat with PDF</h1>
87
+ """
88
+
89
+ iface = gr.Interface(
90
+ fn=process_pdf,
91
+ inputs=gr_inputs.File(label="Load a PDF", type="file", accept=".pdf"),
92
+ outputs=gr_outputs.Textbox(label="Status", type="auto", default=""),
93
+ title="PDF Chatbot Interface",
94
+ description="Upload a PDF file to start interacting with the chatbot.",
95
+ allow_flagging=False,
96
+ css=css
97
+ )
98
+
99
+ # Add chat history and question input to the interface
100
+ chatbot_interface = gr.Interface(
101
+ fn=query,
102
+ inputs=gr_inputs.Textbox(label="Question", placeholder="Type your question and hit Enter"),
103
+ outputs=gr_outputs.Textbox(label="Chat History", type="auto", default=""),
104
+ title=title_html,
105
+ live=True,
106
+ css=css
107
+ )
108
+
109
+ # Launch the combined interface
110
+ iface.launch()
111
+ chatbot_interface.launch()