chatbotrag / app.py
imeesam's picture
Create app.py
78e71e1 verified
import os
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
# ───────────────────────── CONFIG ─────────────────────────
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
GROQ_MODEL = "llama-3.1-8b-instant"
TOP_K = 3
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
# ───────────────────────── INIT MODELS ─────────────────────────
embeddings = HuggingFaceEmbeddings(
model_name=EMBED_MODEL,
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
def create_llm():
return ChatGroq(
model=GROQ_MODEL,
temperature=0.2,
max_tokens=1024,
groq_api_key=os.environ["GROQ_API_KEY"]
)
RAG_PROMPT = ChatPromptTemplate.from_template("""
You are a helpful assistant.
Answer ONLY using the context below.
If not found, say you don't have enough information.
Context:
{context}
Question: {question}
Answer:
""")
def format_docs(docs):
return "\n\n".join(d.page_content for d in docs)
# ───────────────────────── GLOBAL STATE ─────────────────────────
vectorstore = None
rag_chain = None
# ───────────────────────── PROCESS PDF ─────────────────────────
def process_pdf(file):
global vectorstore, rag_chain
if file is None:
return "Upload a PDF first."
path = file.name
# Load
loader = PyPDFLoader(path)
docs = loader.load()
# Split
splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=50
)
chunks = splitter.split_documents(docs)
# Vector store
if vectorstore is None:
vectorstore = FAISS.from_documents(chunks, embeddings)
else:
vectorstore.add_documents(chunks)
retriever = vectorstore.as_retriever(search_kwargs={"k": TOP_K})
llm = create_llm()
rag_chain = (
{
"context": retriever | format_docs,
"question": RunnablePassthrough()
}
| RAG_PROMPT
| llm
| StrOutputParser()
)
return f"βœ… PDF processed successfully!\nChunks: {len(chunks)}"
# ───────────────────────── CHAT FUNCTION ─────────────────────────
def chat(message, history):
if rag_chain is None:
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": "Please upload a PDF first."})
return "", history
response = rag_chain.invoke(message)
history.append({"role": "user", "content": message})
history.append({"role": "assistant", "content": response})
return "", history
# ───────────────────────── UI ─────────────────────────
with gr.Blocks(title="RAG Chatbot") as demo:
gr.Markdown("## πŸ“„ PDF RAG Chatbot (Groq + FAISS + LangChain)")
with gr.Row():
file = gr.File(label="Upload PDF")
upload_btn = gr.Button("Process PDF")
status = gr.Textbox(label="Status")
chatbot = gr.Chatbot()
msg = gr.Textbox(label="Ask a question")
upload_btn.click(process_pdf, inputs=file, outputs=status)
msg.submit(chat, inputs=[msg, chatbot], outputs=[msg, chatbot])
demo.launch()