Spaces:

SRINI123
/

DocQuery

Runtime error

App Files Files Community

DocQuery / app.py

SRINI123

Update app.py

06537d4 verified over 1 year ago

raw

history blame contribute delete

2.86 kB

	import gradio as gr
	from langchain_community.vectorstores import Chroma
	from dotenv import load_dotenv
	from langchain_huggingface import HuggingFaceEmbeddings, HuggingFaceEndpoint
	from langchain_community.llms import HuggingFaceHub
	from langchain.chains import ConversationalRetrievalChain
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.memory import ConversationBufferMemory
	from langchain_community.document_loaders import PyPDFLoader
	import os

	load_dotenv()

	# Get the Hugging Face API token from the .env file
	hf_api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
	if hf_api_token is None:
	raise ValueError("HUGGINGFACEHUB_API_TOKEN not found in .env file")

	# Initialize the Hugging Face embedding model
	embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

	# Initialize the LLaMA 2 model from Hugging Face Hub using the token from .env
	llm = HuggingFaceEndpoint(
	repo_id="meta-llama/Llama-2-7b-hf",
	temperature=0.7, # Specify temperature explicitly
	max_length=512,
	huggingfacehub_api_token=hf_api_token
	)

	# Initialize ChromaDB for storing and retrieving document embeddings
	vectorstore = Chroma(embedding_function=embedding_model, persist_directory="chroma_db")

	# Create a conversational chain with retrieval capabilities
	memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
	qa_chain = ConversationalRetrievalChain.from_llm(llm, retriever=vectorstore.as_retriever(), memory=memory)

	def upload_docs(docs):
	# Load and process the uploaded PDF documents
	loaded_docs = []
	for doc in docs:
	loader = PyPDFLoader(doc.name)
	loaded_docs.extend(loader.load())

	# Split documents into manageable chunks
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
	texts = text_splitter.split_documents(loaded_docs)

	# Add documents to the vector store and persist them
	vectorstore.add_documents(texts)
	vectorstore.persist()
	return "PDF documents uploaded and processed successfully!"

	def chat(query):
	# Process the query with the conversational chain and return the result
	response = qa_chain({"query": query})
	return response["result"]

	# Gradio Interface
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	doc_upload = gr.File(label="Upload your PDF documents", file_types=[".pdf"], multiple=True)
	upload_button = gr.Button("Upload")
	upload_button.click(upload_docs, inputs=doc_upload, outputs=gr.Textbox())
	with gr.Column():
	chat_input = gr.Textbox(label="Ask a question:")
	chat_output = gr.Textbox(label="Answer:")
	chat_button = gr.Button("Send")
	chat_button.click(chat, inputs=chat_input, outputs=chat_output)

	demo.launch()