dnzblgn commited on
Commit
05fe184
·
verified ·
1 Parent(s): 3ee7245

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_community.embeddings import HuggingFaceEmbeddings
4
+ from langchain_community.llms import HuggingFaceEndpoint
5
+ from langchain_community.vectorstores import FAISS
6
+ from langchain_community.document_loaders import PyPDFLoader
7
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
8
+ from langchain.chains import ConversationalRetrievalChain
9
+ from langchain.memory import ConversationBufferMemory
10
+
11
+ # Initialize embeddings
12
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
13
+
14
+ # Initialize Mistral LLM
15
+ llm = HuggingFaceEndpoint(
16
+ endpoint_url="https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2",
17
+ huggingfacehub_api_token=os.getenv("HF_TOKEN"),
18
+ task="text-generation",
19
+ )
20
+
21
+ def process_pdf(pdf_file):
22
+ # Load PDF
23
+ loader = PyPDFLoader(pdf_file)
24
+ documents = loader.load()
25
+
26
+ # Split text into chunks
27
+ text_splitter = RecursiveCharacterTextSplitter(
28
+ chunk_size=1000,
29
+ chunk_overlap=200,
30
+ length_function=len
31
+ )
32
+ chunks = text_splitter.split_documents(documents)
33
+
34
+ # Create vector store
35
+ vectorstore = FAISS.from_documents(chunks, embeddings)
36
+
37
+ return vectorstore
38
+
39
+ def setup_rag_chain(vectorstore):
40
+ memory = ConversationBufferMemory(
41
+ memory_key="chat_history",
42
+ return_messages=True,
43
+ output_key='answer'
44
+ )
45
+
46
+ chain = ConversationalRetrievalChain.from_llm(
47
+ llm=llm,
48
+ retriever=vectorstore.as_retriever(search_kwargs={'k': 3}),
49
+ memory=memory,
50
+ return_source_documents=True,
51
+ chain_type="stuff",
52
+ verbose=True
53
+ )
54
+
55
+ return chain
56
+
57
+ def get_response(query, chain):
58
+ result = chain({"question": query})
59
+ return result['answer']
60
+
61
+ def create_demo():
62
+ def process_file(file):
63
+ vectorstore = process_pdf(file.name)
64
+ return setup_rag_chain(vectorstore)
65
+
66
+ def respond(message, history, chain_state):
67
+ if chain_state is None:
68
+ return "Please upload a PDF first."
69
+ response = get_response(message, chain_state)
70
+ return response
71
+
72
+ with gr.Blocks() as demo:
73
+ chain_state = gr.State(None)
74
+
75
+ with gr.Row():
76
+ file_input = gr.File(label="Upload PDF", file_types=[".pdf"])
77
+
78
+ chatbot = gr.Chatbot()
79
+ msg = gr.Textbox(label="Question")
80
+ clear = gr.Button("Clear")
81
+
82
+ file_input.upload(fn=process_file, outputs=[chain_state])
83
+ msg.submit(fn=respond, inputs=[msg, chatbot, chain_state], outputs=[chatbot])
84
+ clear.click(lambda: None, None, chatbot, queue=False)
85
+
86
+ return demo
87
+
88
+ if __name__ == "__main__":
89
+ demo = create_demo()
90
+ demo.launch()