rag-chatbot / app.py
ShahbazAhmad-Lab's picture
Create app.py
ce13a0a verified
# ============================================================
# RAG Chatbot β€” Hugging Face Spaces Deployment
# app.py
# ============================================================
import os, warnings
warnings.filterwarnings("ignore")
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
import gradio as gr
# API Key β€” HF Secrets se automatically aayegi
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
# ── Document Load Function ────────────────────────────────────
def load_docs(folder="uploaded_docs"):
all_docs = []
if not os.path.exists(folder):
os.makedirs(folder)
return all_docs
for fname in os.listdir(folder):
fpath = os.path.join(folder, fname)
try:
if fname.endswith(".pdf"):
docs = PyPDFLoader(fpath).load()
all_docs.extend(docs)
print(f" βœ… PDF: {fname} β€” {len(docs)} pages")
elif fname.endswith(".txt"):
docs = TextLoader(fpath, encoding="utf-8").load()
all_docs.extend(docs)
print(f" βœ… TXT: {fname}")
except Exception as e:
print(f" ❌ {fname}: {e}")
return all_docs
# ── RAG Build Function ────────────────────────────────────────
def build_rag(docs):
chunks = RecursiveCharacterTextSplitter(
chunk_size=500,
chunk_overlap=80,
separators=["\n\n", "\n", ". ", " ", ""]
).split_documents(docs)
print(f"βœ… {len(chunks)} chunks")
print("βš™οΈ Loading embedding model...")
emb = HuggingFaceEmbeddings(
model_name="all-MiniLM-L6-v2",
model_kwargs={"device": "cpu"},
encode_kwargs={"normalize_embeddings": True}
)
vs = FAISS.from_documents(chunks, emb)
print("βœ… FAISS ready")
llm = ChatGroq(
groq_api_key=GROQ_API_KEY,
model_name="llama-3.3-70b-versatile",
temperature=0.3,
max_tokens=1024
)
print("βœ… Groq connected")
retriever = vs.as_retriever(search_kwargs={"k": 3})
prompt = ChatPromptTemplate.from_messages([
("system", """You are a helpful AI assistant.
Answer questions using ONLY the context provided below.
If the answer is not in the context, say:
I don't have that information in the provided documents.
Context: {context}"""),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}")
])
def fmt(docs):
return "\n\n---\n\n".join(
f"[Chunk {i+1}]:\n{d.page_content}"
for i, d in enumerate(docs)
)
chain = (
RunnablePassthrough.assign(
context=RunnableLambda(
lambda x: fmt(retriever.invoke(x["question"]))
)
)
| prompt | llm | StrOutputParser()
)
return chain
# ── Global State ──────────────────────────────────────────────
rag_chain = None
ui_history = []
# ── Gradio Functions ──────────────────────────────────────────
def process_files(files):
global rag_chain
if not files:
return "⚠️ Koi file select nahi ki!"
os.makedirs("uploaded_docs", exist_ok=True)
for f in os.listdir("uploaded_docs"):
os.remove(os.path.join("uploaded_docs", f))
names = []
for file in files:
name = os.path.basename(file.name)
dest = os.path.join("uploaded_docs", name)
with open(file.name, "rb") as s, open(dest, "wb") as d:
d.write(s.read())
names.append(name)
docs = load_docs("uploaded_docs")
if not docs:
return "❌ Documents load nahi hue!"
try:
rag_chain = build_rag(docs)
chars = sum(len(d.page_content) for d in docs)
return f"βœ… Ready! Files: {', '.join(names)} | Pages: {len(docs)} | Characters: {chars:,}\n\nπŸ’¬ Ab neeche sawal poochho!"
except Exception as e:
return f"❌ Error: {str(e)}"
def chat_fn(msg, history):
global rag_chain, ui_history
if not msg.strip():
return "", history
if rag_chain is None:
return "", history + [("", "⚠️ Pehle PDF upload karo aur Process karo!")]
try:
ans = rag_chain.invoke({
"question": msg,
"chat_history": ui_history
})
ui_history.append(HumanMessage(content=msg))
ui_history.append(AIMessage(content=ans))
except Exception as e:
ans = f"❌ Error: {str(e)}"
print(f"ERROR: {e}")
return "", history + [(msg, ans)]
def clear_fn():
global ui_history
ui_history = []
return []
# ── Gradio UI ─────────────────────────────────────────────────
with gr.Blocks(theme=gr.themes.Soft(), title="RAG Chatbot") as demo:
gr.Markdown("""
# πŸ€– RAG Chatbot β€” PDF Support
**Stack:** FAISS Β· HuggingFace Embeddings Β· Groq LLaMA 3.3
> Apni PDF upload karo aur sawal poochho!
""")
gr.Markdown("### πŸ“€ Step 1: PDF Upload Karo")
with gr.Row():
with gr.Column(scale=3):
file_input = gr.File(
label="PDF / TXT files select karo",
file_types=[".pdf", ".txt"],
file_count="multiple"
)
process_btn = gr.Button("βš™οΈ Process Documents", variant="primary")
with gr.Column(scale=2):
status_box = gr.Markdown("πŸ“‹ Status: Waiting for upload...")
gr.Markdown("---\n### πŸ’¬ Step 2: Sawal Poochho")
chatbot = gr.Chatbot(label="Chat", height=450)
with gr.Row():
msg_box = gr.Textbox(
placeholder="PDF ke baare mein sawal poochho...",
label="Sawal",
scale=5
)
send_btn = gr.Button("Send πŸš€", variant="primary", scale=1)
clear_btn = gr.Button("πŸ—‘οΈ Clear Chat", variant="secondary")
gr.Examples(
examples=[
"Is document ka summary do",
"Main topic kya hai?",
"Important points batao",
"Koi specific cheez explain karo",
],
inputs=msg_box,
label="πŸ’‘ Example Sawaal:"
)
gr.Markdown("---\nπŸ’¬ **Tip:** PDF process hone ke baad sawal poochho. Clear se chat reset hoga.")
process_btn.click(fn=process_files, inputs=[file_input], outputs=[status_box])
send_btn.click(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot])
msg_box.submit(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot])
clear_btn.click(fn=clear_fn, outputs=[chatbot])
if __name__ == "__main__":
demo.launch()