Spaces:

Sazzz02
/

QA_Bot

Sleeping

App Files Files Community

QA_Bot / app.py

Sazzz02

Update app.py

5b5526d verified 5 months ago

raw

history blame contribute delete

3.18 kB

	import gradio as gr
	import os
	import hashlib
	import pickle
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import PyPDFLoader
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.vectorstores import FAISS
	from langchain.chains import RetrievalQA
	from langchain_groq import ChatGroq

	# Load Groq API key
	GROQ_API_KEY = os.getenv("GROQ_API_KEY")

	# Directory to cache vectorstores
	CACHE_DIR = "vector_cache"
	os.makedirs(CACHE_DIR, exist_ok=True)


	def get_pdf_hash(pdf_path: str) -> str:
	"""Generate a hash for the PDF file to use as cache key"""
	with open(pdf_path, "rb") as f:
	data = f.read()
	return hashlib.md5(data).hexdigest()


	def build_vectorstore(pdf_path: str):
	"""Load PDF, chunk it, embed, and create FAISS index"""
	loader = PyPDFLoader(pdf_path)
	documents = loader.load()

	# Chunking strategy
	text_splitter = RecursiveCharacterTextSplitter(
	chunk_size=1000,
	chunk_overlap=200,
	separators=["\n\n", "\n", " ", ""]
	)
	chunks = text_splitter.split_documents(documents)

	embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
	vectorstore = FAISS.from_documents(chunks, embeddings)
	return vectorstore


	def get_vectorstore(pdf_path: str):
	"""Return cached FAISS index if available, else build new one"""
	pdf_hash = get_pdf_hash(pdf_path)
	cache_file = os.path.join(CACHE_DIR, f"{pdf_hash}.pkl")

	if os.path.exists(cache_file):
	with open(cache_file, "rb") as f:
	return pickle.load(f)

	# Build and cache
	vectorstore = build_vectorstore(pdf_path)
	with open(cache_file, "wb") as f:
	pickle.dump(vectorstore, f)
	return vectorstore


	def rag_bot(question: str, pdf_path: str):
	"""Answer user queries using the uploaded PDF"""
	if not pdf_path:
	return "⚠️ Please upload a PDF first."

	try:
	# Load or build vectorstore
	vectorstore = get_vectorstore(pdf_path)
	retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

	# Use Groq LLM
	llm = ChatGroq(
	groq_api_key=GROQ_API_KEY,
	model_name="llama-3.3-70b-versatile", # The updated model name
	)

	qa = RetrievalQA.from_chain_type(
	llm=llm,
	chain_type="stuff",
	retriever=retriever,
	)
	result = qa.run(question)
	return result
	except Exception as e:
	return f"❌ Error: {e}"


	# ------------------ Gradio UI ------------------
	with gr.Blocks() as demo:
	gr.Markdown("## 📖 RAG Q&A Bot – Powered by Groq + HuggingFace Embeddings")

	with gr.Row():
	pdf_file = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
	with gr.Row():
	question = gr.Textbox(label="Ask a Question")
	with gr.Row():
	answer = gr.Textbox(label="Answer", interactive=False)

	submit = gr.Button("Submit")
	submit.click(fn=rag_bot, inputs=[question, pdf_file], outputs=answer)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)