# ============================================================ # RAG Chatbot — Hugging Face Spaces # Upload PDFs and ask questions! # ============================================================ import os, warnings warnings.filterwarnings("ignore") from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_groq import ChatGroq from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.messages import HumanMessage, AIMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableLambda import gradio as gr # API Key from HF Secrets GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "") # ── Load PDFs ───────────────────────────────────────────────── def load_pdfs(files): all_docs = [] names = [] for file in files: try: loader = PyPDFLoader(file.name) docs = loader.load() for doc in docs: doc.metadata["source"] = os.path.basename(file.name) all_docs.extend(docs) names.append(os.path.basename(file.name)) print(f" ✅ {os.path.basename(file.name)} — {len(docs)} pages") except Exception as e: print(f" ❌ Error: {e}") return all_docs, names # ── Build RAG ───────────────────────────────────────────────── def build_rag(all_docs): chunks = RecursiveCharacterTextSplitter( chunk_size=600, chunk_overlap=100, separators=["\n\n", "\n", ". ", " ", ""] ).split_documents(all_docs) print(f" ✂️ {len(chunks)} chunks") emb = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) vs = FAISS.from_documents(chunks, emb) llm = ChatGroq( groq_api_key=GROQ_API_KEY, model_name="llama-3.3-70b-versatile", temperature=0.3, max_tokens=1500 ) retriever = vs.as_retriever(search_kwargs={"k": 4}) prompt = ChatPromptTemplate.from_messages([ ("system", """You are an expert AI assistant. Answer using ONLY the context below. Always mention the source document. If answer not found, say: I don't have that information in the provided documents. Context: {context}"""), MessagesPlaceholder(variable_name="chat_history"), ("human", "{question}") ]) def fmt(docs): return "\n\n---\n\n".join( f"[Source: {d.metadata.get('source','?')} | Page {d.metadata.get('page',0)+1}]:\n{d.page_content}" for d in docs ) chain = ( RunnablePassthrough.assign( context=RunnableLambda( lambda x: fmt(retriever.invoke(x["question"])) ) ) | prompt | llm | StrOutputParser() ) return chain, len(chunks) # ── Global State ────────────────────────────────────────────── rag_chain = None ui_history = [] # ── Gradio Functions ────────────────────────────────────────── def process_files(files): global rag_chain if not files: return "⚠️ Koi file select nahi ki!", "" print(f"\n📁 Processing {len(files)} file(s)...") docs, names = load_pdfs(files) if not docs: return "❌ PDFs se content extract nahi hua!", "" try: chain, n_chunks = build_rag(docs) rag_chain = chain chars = sum(len(d.page_content) for d in docs) names_list = "\n".join([f"• {n}" for n in names]) return ( f"✅ **{len(names)} file(s) loaded!**\n\n{names_list}\n\n" f"📊 {len(docs)} pages | {n_chunks} chunks | {chars:,} chars\n\n" f"💬 **Ab sawal poochho!**" ), f"{len(names)} docs" except Exception as e: return f"❌ Error: {str(e)}", "" def chat_fn(msg, history): global rag_chain, ui_history if not msg.strip(): return "", history if rag_chain is None: history.append({ "role": "assistant", "content": "⚠️ Pehle PDF upload karo aur Process karo!" }) return "", history try: ans = rag_chain.invoke({ "question": msg, "chat_history": ui_history }) ui_history.append(HumanMessage(content=msg)) ui_history.append(AIMessage(content=ans)) except Exception as e: ans = f"❌ Error: {str(e)}" print(f"ERROR: {e}") history.append({"role": "user", "content": msg}) history.append({"role": "assistant", "content": ans}) return "", history def clear_fn(): global ui_history ui_history = [] return [] # ── CSS ─────────────────────────────────────────────────────── css = """ @import url('https://fonts.googleapis.com/css2?family=Syne:wght@700;800&family=DM+Sans:wght@300;400;500&display=swap'); * { box-sizing: border-box; } body, .gradio-container { font-family: 'DM Sans', sans-serif !important; background: #0a0a0f !important; color: #e8e6f0 !important; } .gradio-container { max-width: 960px !important; margin: 0 auto !important; } .app-title { font-family: 'Syne', sans-serif !important; font-size: 2.4rem !important; font-weight: 800 !important; background: linear-gradient(135deg, #a78bfa, #60a5fa, #34d399) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; background-clip: text !important; text-align: center !important; padding: 32px 0 8px !important; } .badge { display: inline-flex; align-items: center; gap: 5px; background: rgba(139,92,246,0.1); border: 1px solid rgba(139,92,246,0.25); border-radius: 20px; padding: 4px 12px; font-size: 0.72rem; color: #a78bfa; font-weight: 500; margin: 3px; } .section-label { font-family: 'Syne', sans-serif !important; font-size: 0.7rem !important; font-weight: 700 !important; letter-spacing: 2.5px !important; text-transform: uppercase !important; color: #a78bfa !important; margin: 20px 0 12px !important; } textarea, input[type=text] { background: #0d0d14 !important; border: 1px solid #1f1f2e !important; border-radius: 10px !important; color: #e8e6f0 !important; font-family: 'DM Sans', sans-serif !important; font-size: 0.9rem !important; transition: border-color 0.2s, box-shadow 0.2s !important; scrollbar-width: thin !important; scrollbar-color: #2d2d45 transparent !important; } textarea:focus, input[type=text]:focus { border-color: #a78bfa !important; box-shadow: 0 0 0 3px rgba(139,92,246,0.12) !important; outline: none !important; } textarea::-webkit-scrollbar { width: 4px !important; } textarea::-webkit-scrollbar-thumb { background: #2d2d45 !important; border-radius: 10px !important; } textarea::-webkit-scrollbar-thumb:hover { background: #a78bfa !important; } button.primary { background: linear-gradient(135deg, #7c3aed, #4f46e5) !important; border: none !important; border-radius: 10px !important; color: white !important; font-family: 'Syne', sans-serif !important; font-weight: 600 !important; box-shadow: 0 4px 15px rgba(124,58,237,0.3) !important; transition: all 0.2s ease !important; } button.primary:hover { transform: translateY(-1px) !important; box-shadow: 0 6px 20px rgba(124,58,237,0.4) !important; } button.secondary { background: #13131a !important; border: 1px solid #2d2d45 !important; border-radius: 10px !important; color: #9ca3af !important; transition: all 0.2s !important; } button.secondary:hover { border-color: #a78bfa !important; color: #a78bfa !important; } label span { color: #6b7280 !important; font-size: 0.8rem !important; } .examples-table td, .examples td { background: #13131a !important; border: 1px solid #1f1f2e !important; border-radius: 8px !important; color: #9ca3af !important; font-size: 0.8rem !important; cursor: pointer !important; transition: all 0.2s !important; } .examples-table td:hover, .examples td:hover { background: #1e1e30 !important; color: #a78bfa !important; border-color: #a78bfa !important; } """ # ── UI ──────────────────────────────────────────────────────── with gr.Blocks( css=css, title="RAG Intelligence", theme=gr.themes.Base( primary_hue="violet", neutral_hue="slate" ) ) as demo: gr.HTML("""
⚡ RAG Intelligence
Multi-Document AI · FAISS · Groq LLaMA 3.3
🧠 HuggingFace ⚡ Groq LLM 🔍 FAISS 📄 Multi-PDF
""") gr.HTML('
📥   Upload Your PDFs
') with gr.Row(): with gr.Column(scale=3): file_input = gr.File( label="PDF files select karo (multiple ho sakti hain)", file_types=[".pdf"], file_count="multiple", ) process_btn = gr.Button( "⚙️ Process Documents", variant="primary" ) with gr.Column(scale=2): status_out = gr.Markdown( "📋 **Status:** Waiting for documents..." ) badge_out = gr.Markdown("**0 docs loaded**") gr.HTML('
') gr.HTML('
💬   Chat With Documents
') chatbot = gr.Chatbot( label="", height=480, type="messages", show_label=False, placeholder="
Load documents first, then ask anything! ✦
", ) with gr.Row(): msg_box = gr.Textbox( placeholder="✦ Apne documents ke baare mein kuch bhi poochho...", label="", lines=2, max_lines=5, scale=5, show_label=False, container=False, ) with gr.Column(scale=1, min_width=110): send_btn = gr.Button("Send ➤", variant="primary") clear_btn = gr.Button("Clear 🗑", variant="secondary") gr.Examples( examples=[ "Is document ka summary do", "Main topics kya hain?", "Important points bullet mein batao", "Koi definition explain karo", "Key concepts list karo", ], inputs=msg_box, label="✦ Quick Questions", ) gr.HTML("""
RAG INTELLIGENCE · FAISS · GROQ · HUGGINGFACE
""") # Events process_btn.click( fn=process_files, inputs=[file_input], outputs=[status_out, badge_out] ) send_btn.click( fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot] ) msg_box.submit( fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot] ) clear_btn.click(fn=clear_fn, outputs=[chatbot]) if __name__ == "__main__": demo.launch()