File size: 3,178 Bytes
1aba22b 8ab7a24 1aba22b f56d8ed 1aba22b f56d8ed 1aba22b f56d8ed 1aba22b 8ab7a24 1aba22b f56d8ed 1aba22b f56d8ed 1aba22b 8ab7a24 1aba22b 8ab7a24 1aba22b f56d8ed 5b5526d f56d8ed 1aba22b f56d8ed 1aba22b 8ab7a24 1aba22b 8ab7a24 1aba22b f56d8ed 8ab7a24 1aba22b 8ab7a24 1aba22b 8ab7a24 1aba22b 5b5526d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import os
import hashlib
import pickle
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
# Load Groq API key
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# Directory to cache vectorstores
CACHE_DIR = "vector_cache"
os.makedirs(CACHE_DIR, exist_ok=True)
def get_pdf_hash(pdf_path: str) -> str:
"""Generate a hash for the PDF file to use as cache key"""
with open(pdf_path, "rb") as f:
data = f.read()
return hashlib.md5(data).hexdigest()
def build_vectorstore(pdf_path: str):
"""Load PDF, chunk it, embed, and create FAISS index"""
loader = PyPDFLoader(pdf_path)
documents = loader.load()
# Chunking strategy
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
separators=["\n\n", "\n", " ", ""]
)
chunks = text_splitter.split_documents(documents)
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(chunks, embeddings)
return vectorstore
def get_vectorstore(pdf_path: str):
"""Return cached FAISS index if available, else build new one"""
pdf_hash = get_pdf_hash(pdf_path)
cache_file = os.path.join(CACHE_DIR, f"{pdf_hash}.pkl")
if os.path.exists(cache_file):
with open(cache_file, "rb") as f:
return pickle.load(f)
# Build and cache
vectorstore = build_vectorstore(pdf_path)
with open(cache_file, "wb") as f:
pickle.dump(vectorstore, f)
return vectorstore
def rag_bot(question: str, pdf_path: str):
"""Answer user queries using the uploaded PDF"""
if not pdf_path:
return "⚠️ Please upload a PDF first."
try:
# Load or build vectorstore
vectorstore = get_vectorstore(pdf_path)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})
# Use Groq LLM
llm = ChatGroq(
groq_api_key=GROQ_API_KEY,
model_name="llama-3.3-70b-versatile", # The updated model name
)
qa = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
)
result = qa.run(question)
return result
except Exception as e:
return f"❌ Error: {e}"
# ------------------ Gradio UI ------------------
with gr.Blocks() as demo:
gr.Markdown("## 📖 RAG Q&A Bot – Powered by Groq + HuggingFace Embeddings")
with gr.Row():
pdf_file = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
with gr.Row():
question = gr.Textbox(label="Ask a Question")
with gr.Row():
answer = gr.Textbox(label="Answer", interactive=False)
submit = gr.Button("Submit")
submit.click(fn=rag_bot, inputs=[question, pdf_file], outputs=answer)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860) |