netero89 commited on
Commit
0c60c20
·
verified ·
1 Parent(s): d1c1086

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +148 -0
app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __import__('pysqlite3')
2
+ import sys
3
+ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
4
+ from langchain_community.llms import Ollama
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from path import Path
7
+ from langchain.chains import create_history_aware_retriever
8
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
9
+ from langchain.chains import create_retrieval_chain
10
+ from langchain.chains.combine_documents import create_stuff_documents_chain
11
+ from langchain_community.document_loaders import WebBaseLoader
12
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
13
+ from langchain_community.vectorstores import Chroma
14
+ from langchain_community import embeddings
15
+ from langchain_core.messages import AIMessage, HumanMessage
16
+
17
+ llm = Ollama(model = "mistral")
18
+
19
+
20
+
21
+ def build_the_bot2(input_text):
22
+
23
+ import os
24
+ print(input_text)
25
+
26
+ global loader, vectorstore, rag_chain, qa_prompt, contextualize_q_system_prompt, contextualize_q_prompt, history_aware_retriever
27
+ contextualize_q_system_prompt = """Given a chat history and the latest user question \
28
+ which might reference context in the chat history, formulate a standalone question \
29
+ which can be understood without the chat history. Do NOT answer the question, \
30
+ just reformulate it if needed and otherwise return it as is."""
31
+ contextualize_q_prompt = ChatPromptTemplate.from_messages(
32
+ [
33
+ ("system", contextualize_q_system_prompt),
34
+ MessagesPlaceholder("chat_history"),
35
+ ("human", "{input}"),
36
+ ]
37
+ )
38
+
39
+ #prompt = hub.pull("rlm/rag-prompt")
40
+
41
+ loader = PyPDFLoader(file_path=Path(input_text))
42
+ documents = loader.load()
43
+ text_splitter = RecursiveCharacterTextSplitter(
44
+ chunk_size = 1000,
45
+ chunk_overlap = 200,
46
+ add_start_index = True
47
+ )
48
+ all_splits = text_splitter.split_documents(documents)
49
+
50
+ embedding = embeddings.OllamaEmbeddings(
51
+ model="nomic-embed-text"
52
+ )
53
+ vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding, persist_directory="./sfbook")
54
+ vectorstore.persist()
55
+ retriever = vectorstore.as_retriever( search_type = "similarity", search_kwargs = {"k":6})
56
+
57
+
58
+ qa_system_prompt = """You are an assistant for question-answering tasks. \
59
+ Use the following pieces of retrieved context to answer the question. \
60
+ If you don't know the answer, just say that you don't know. \
61
+ Use three sentences maximum and keep the answer concise.\
62
+
63
+ {context}"""
64
+ qa_prompt = ChatPromptTemplate.from_messages(
65
+ [
66
+ ("system", qa_system_prompt),
67
+ MessagesPlaceholder("chat_history"),
68
+ ("human", "{input}"),
69
+ ]
70
+ )
71
+
72
+
73
+ question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)
74
+
75
+ #documents = loader.load_data("workspace/ACCD-0313-Connect12_v2.1-UserGuide-EN-Rev05.pdf")
76
+ #documents = loader.load_data(Path(input_text))
77
+
78
+ # rag_chain = (
79
+ # RunnablePassthrough.assign(
80
+ # context = contextualized_question | retriever | format_docs
81
+ # )
82
+ # | qa_prompt
83
+ # | llm
84
+ # )
85
+ history_aware_retriever = create_history_aware_retriever(
86
+ llm, retriever, contextualize_q_prompt
87
+ )
88
+ rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)
89
+
90
+
91
+ return('Index saved successful!!!')
92
+
93
+
94
+
95
+ global chat_context
96
+ chat_context = []
97
+ def chat(chat_history, user_input,chat_context):
98
+ #print(chat_history)
99
+ #print(user_input)
100
+ chat_context = chat_context or []
101
+ ai_msg = rag_chain.invoke(
102
+ {
103
+ "question": user_input,
104
+ "chat_history": chat_history
105
+ }
106
+ )
107
+ #print(f"aimsg {ai_msg}")
108
+ chat_context.extend([HumanMessage(content=user_input), ai_msg])
109
+ #print(f"chathistory: {chat_history}")
110
+ #chat_history.append((user_input, ai_msg))
111
+ #print(bot_response)
112
+ response = ""
113
+ for letter in "".join(ai_msg): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
114
+ response += letter + ""
115
+ yield chat_history + [(user_input, response)]
116
+
117
+ #return chat_history, chat_history
118
+
119
+ def upload_file(files):
120
+ #file_paths = [file.name for file in files]
121
+ #return file_paths
122
+ print(files)
123
+ return files[0]
124
+
125
+
126
+ import gradio as gr
127
+ block = gr.Blocks()
128
+
129
+ #based on this https://python.langchain.com/v0.1/docs/use_cases/question_answering/chat_history/
130
+
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown('# Q&A Bot with Mistral Model')
133
+ with gr.Tab("Input Text Document"):
134
+ file_output = gr.File()
135
+ upload_button=gr.UploadButton(file_types=[".pdf",".csv",".docx"])
136
+ upload_button.upload(upload_file, upload_button, file_output)
137
+ text_output = gr.Textbox()
138
+ text_button = gr.Button("Build the Bot!!!")
139
+ text_button.click(build_the_bot2, file_output, text_output)
140
+ with gr.Tab("Knowledge Bot"):
141
+ chatbot = gr.Chatbot()
142
+ message = gr.Textbox("what is this document about?")
143
+ #state = gr.State()
144
+
145
+ #message = gr.Textbox ("SEND")
146
+ message.submit(chat2, [ chatbot, message, gr.State(chat_context)], chatbot)
147
+ #submit.click(chat, inputs= [message, state], outputs= [chatbot])
148
+ demo.queue().launch(share=True, debug=True)