|
|
__import__('pysqlite3') |
|
|
import sys |
|
|
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') |
|
|
from langchain_community.llms import Ollama |
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
from pathlib import Path |
|
|
from langchain.chains import create_history_aware_retriever |
|
|
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
|
from langchain.chains import create_retrieval_chain |
|
|
from langchain.chains.combine_documents import create_stuff_documents_chain |
|
|
from langchain_community.document_loaders import WebBaseLoader |
|
|
from langchain_text_splitters import RecursiveCharacterTextSplitter |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from langchain_community import embeddings |
|
|
from langchain_core.messages import AIMessage, HumanMessage |
|
|
|
|
|
llm = Ollama(model = "mistral") |
|
|
|
|
|
|
|
|
def build_the_bot2(input_text): |
|
|
|
|
|
import os |
|
|
print(input_text) |
|
|
|
|
|
global loader, vectorstore, rag_chain, qa_prompt, contextualize_q_system_prompt, contextualize_q_prompt, history_aware_retriever |
|
|
contextualize_q_system_prompt = """Given a chat history and the latest user question \ |
|
|
which might reference context in the chat history, formulate a standalone question \ |
|
|
which can be understood without the chat history. Do NOT answer the question, \ |
|
|
just reformulate it if needed and otherwise return it as is.""" |
|
|
contextualize_q_prompt = ChatPromptTemplate.from_messages( |
|
|
[ |
|
|
("system", contextualize_q_system_prompt), |
|
|
MessagesPlaceholder("chat_history"), |
|
|
("human", "{input}"), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
loader = PyPDFLoader(file_path=Path(input_text)) |
|
|
documents = loader.load() |
|
|
text_splitter = RecursiveCharacterTextSplitter( |
|
|
chunk_size = 1000, |
|
|
chunk_overlap = 200, |
|
|
add_start_index = True |
|
|
) |
|
|
all_splits = text_splitter.split_documents(documents) |
|
|
|
|
|
embedding = embeddings.OllamaEmbeddings( |
|
|
model="nomic-embed-text" |
|
|
) |
|
|
vectorstore = Chroma.from_documents(documents=all_splits, embedding=embedding, persist_directory="./sfbook") |
|
|
vectorstore.persist() |
|
|
retriever = vectorstore.as_retriever( search_type = "similarity", search_kwargs = {"k":6}) |
|
|
|
|
|
|
|
|
qa_system_prompt = """You are an assistant for question-answering tasks. \ |
|
|
Use the following pieces of retrieved context to answer the question. \ |
|
|
If you don't know the answer, just say that you don't know. \ |
|
|
Use three sentences maximum and keep the answer concise.\ |
|
|
|
|
|
{context}""" |
|
|
qa_prompt = ChatPromptTemplate.from_messages( |
|
|
[ |
|
|
("system", qa_system_prompt), |
|
|
MessagesPlaceholder("chat_history"), |
|
|
("human", "{input}"), |
|
|
] |
|
|
) |
|
|
|
|
|
|
|
|
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
history_aware_retriever = create_history_aware_retriever( |
|
|
llm, retriever, contextualize_q_prompt |
|
|
) |
|
|
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) |
|
|
|
|
|
|
|
|
return('Index saved successful!!!') |
|
|
|
|
|
|
|
|
|
|
|
from langchain_core.messages import AIMessage, HumanMessage |
|
|
|
|
|
|
|
|
global chat_context |
|
|
chat_context = [] |
|
|
def chat2(chat_history, user_input,chat_context): |
|
|
|
|
|
|
|
|
chat_context = chat_context or [] |
|
|
ai_msg = rag_chain.invoke( |
|
|
{ |
|
|
"input": user_input, |
|
|
"chat_history": chat_context |
|
|
} |
|
|
) |
|
|
|
|
|
chat_context.extend([HumanMessage(content=user_input), ai_msg["answer"]]) |
|
|
print(ai_msg["answer"]) |
|
|
response = "" |
|
|
for letter in "".join(ai_msg["answer"]): |
|
|
response += letter + "" |
|
|
yield chat_history + [(user_input, response)] |
|
|
|
|
|
|
|
|
|
|
|
def upload_file(files): |
|
|
|
|
|
|
|
|
print(files) |
|
|
return files[0] |
|
|
|
|
|
|
|
|
import gradio as gr |
|
|
block = gr.Blocks() |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown('# Q&A Bot with Mistral Model') |
|
|
with gr.Tab("Input Text Document"): |
|
|
file_output = gr.File() |
|
|
upload_button=gr.UploadButton(file_types=[".pdf",".csv",".docx"]) |
|
|
upload_button.upload(upload_file, upload_button, file_output) |
|
|
text_output = gr.Textbox() |
|
|
text_button = gr.Button("Build the Bot!!!") |
|
|
text_button.click(build_the_bot2, file_output, text_output) |
|
|
with gr.Tab("Knowledge Bot"): |
|
|
chatbot = gr.Chatbot() |
|
|
message = gr.Textbox("what is this document about?") |
|
|
|
|
|
|
|
|
|
|
|
message.submit(chat2, [ chatbot, message, gr.State(chat_context)], chatbot) |
|
|
|
|
|
demo.queue().launch(share=True, debug=True,server_name="0.0.0.0", server_port=7860) |
|
|
|
|
|
|