|
|
import gradio as gr |
|
|
from langchain_community.vectorstores import Chroma |
|
|
from dotenv import load_dotenv |
|
|
from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint |
|
|
from langchain_community.llms import HuggingFaceHub |
|
|
from langchain.chains import ConversationalRetrievalChain |
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
|
from langchain.memory import ConversationBufferMemory |
|
|
from langchain_community.document_loaders import PyPDFLoader |
|
|
import os |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
hf_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
if hf_api_token is None: |
|
|
raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env file") |
|
|
|
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
|
|
|
llm = HuggingFaceEndpoint( |
|
|
repo_id="meta-llama/Llama-2-7b-hf", |
|
|
temperature=0.7, |
|
|
max_length=512, |
|
|
huggingfacehub_api_token=hf_api_token |
|
|
) |
|
|
|
|
|
|
|
|
vectorstore = Chroma(embedding_function=embedding_model, persist_directory="chroma_db") |
|
|
|
|
|
|
|
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
|
qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever=vectorstore.as_retriever(), memory=memory) |
|
|
|
|
|
def upload_docs(docs): |
|
|
|
|
|
loaded_docs = [] |
|
|
for doc in docs: |
|
|
loader = PyPDFLoader(doc.name) |
|
|
loaded_docs.extend(loader.load()) |
|
|
|
|
|
|
|
|
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) |
|
|
texts = text_splitter.split_documents(loaded_docs) |
|
|
|
|
|
|
|
|
vectorstore.add_documents(texts) |
|
|
vectorstore.persist() |
|
|
return "PDF documents uploaded and processed successfully!" |
|
|
|
|
|
def chat(query): |
|
|
|
|
|
response = qa_chain({"query": query}) |
|
|
return response["result"] |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
doc_upload = gr.File(label="Upload your PDF documents", file_types=[".pdf"], multiple=True) |
|
|
upload_button = gr.Button("Upload") |
|
|
upload_button.click(upload_docs, inputs=doc_upload, outputs=gr.Textbox()) |
|
|
with gr.Column(): |
|
|
chat_input = gr.Textbox(label="Ask a question:") |
|
|
chat_output = gr.Textbox(label="Answer:") |
|
|
chat_button = gr.Button("Send") |
|
|
chat_button.click(chat, inputs=chat_input, outputs=chat_output) |
|
|
|
|
|
demo.launch() |