QA_Bot / app.py
Sazzz02's picture
Update app.py
5b5526d verified
import gradio as gr
import os
import hashlib
import pickle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
# Load Groq API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Directory to cache vectorstores
CACHE_DIR = "vector_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
def get_pdf_hash(pdf_path: str) -> str:
"""Generate a hash for the PDF file to use as cache key"""
with open(pdf_path, "rb") as f:
data = f.read()
return hashlib.md5(data).hexdigest()
def build_vectorstore(pdf_path: str):
"""Load PDF, chunk it, embed, and create FAISS index"""
loader = PyPDFLoader(pdf_path)
documents = loader.load()
# Chunking strategy
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
return vectorstore
def get_vectorstore(pdf_path: str):
"""Return cached FAISS index if available, else build new one"""
pdf_hash = get_pdf_hash(pdf_path)
cache_file = os.path.join(CACHE_DIR, f"{pdf_hash}.pkl")
if os.path.exists(cache_file):
with open(cache_file, "rb") as f:
return pickle.load(f)
# Build and cache
vectorstore = build_vectorstore(pdf_path)
with open(cache_file, "wb") as f:
pickle.dump(vectorstore, f)
return vectorstore
def rag_bot(question: str, pdf_path: str):
"""Answer user queries using the uploaded PDF"""
if not pdf_path:
return "⚠️ Please upload a PDF first."
try:
# Load or build vectorstore
vectorstore = get_vectorstore(pdf_path)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Use Groq LLM
llm = ChatGroq(
groq_api_key=GROQ_API_KEY,
model_name="llama-3.3-70b-versatile", # The updated model name
)
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
)
result = qa.run(question)
return result
except Exception as e:
return f"❌ Error: {e}"
# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
gr.Markdown("## πŸ“– RAG Q&A Bot – Powered by Groq + HuggingFace Embeddings")
with gr.Row():
pdf_file = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
with gr.Row():
question = gr.Textbox(label="Ask a Question")
with gr.Row():
answer = gr.Textbox(label="Answer", interactive=False)
submit = gr.Button("Submit")
submit.click(fn=rag_bot, inputs=[question, pdf_file], outputs=answer)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)