# ============================================================ # RAG Chatbot — Hugging Face Spaces Deployment # app.py # ============================================================ import os, warnings warnings.filterwarnings("ignore") from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_community.document_loaders import PyPDFLoader, TextLoader from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain_groq import ChatGroq from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder from langchain_core.messages import HumanMessage, AIMessage from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableLambda import gradio as gr # API Key — HF Secrets se automatically aayegi GROQ_API_KEY = os.environ.get("GROQ_API_KEY") # ── Document Load Function ──────────────────────────────────── def load_docs(folder="uploaded_docs"): all_docs = [] if not os.path.exists(folder): os.makedirs(folder) return all_docs for fname in os.listdir(folder): fpath = os.path.join(folder, fname) try: if fname.endswith(".pdf"): docs = PyPDFLoader(fpath).load() all_docs.extend(docs) print(f" ✅ PDF: {fname} — {len(docs)} pages") elif fname.endswith(".txt"): docs = TextLoader(fpath, encoding="utf-8").load() all_docs.extend(docs) print(f" ✅ TXT: {fname}") except Exception as e: print(f" ❌ {fname}: {e}") return all_docs # ── RAG Build Function ──────────────────────────────────────── def build_rag(docs): chunks = RecursiveCharacterTextSplitter( chunk_size=500, chunk_overlap=80, separators=["\n\n", "\n", ". ", " ", ""] ).split_documents(docs) print(f"✅ {len(chunks)} chunks") print("⚙️ Loading embedding model...") emb = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={"device": "cpu"}, encode_kwargs={"normalize_embeddings": True} ) vs = FAISS.from_documents(chunks, emb) print("✅ FAISS ready") llm = ChatGroq( groq_api_key=GROQ_API_KEY, model_name="llama-3.3-70b-versatile", temperature=0.3, max_tokens=1024 ) print("✅ Groq connected") retriever = vs.as_retriever(search_kwargs={"k": 3}) prompt = ChatPromptTemplate.from_messages([ ("system", """You are a helpful AI assistant. Answer questions using ONLY the context provided below. If the answer is not in the context, say: I don't have that information in the provided documents. Context: {context}"""), MessagesPlaceholder(variable_name="chat_history"), ("human", "{question}") ]) def fmt(docs): return "\n\n---\n\n".join( f"[Chunk {i+1}]:\n{d.page_content}" for i, d in enumerate(docs) ) chain = ( RunnablePassthrough.assign( context=RunnableLambda( lambda x: fmt(retriever.invoke(x["question"])) ) ) | prompt | llm | StrOutputParser() ) return chain # ── Global State ────────────────────────────────────────────── rag_chain = None ui_history = [] # ── Gradio Functions ────────────────────────────────────────── def process_files(files): global rag_chain if not files: return "⚠️ Koi file select nahi ki!" os.makedirs("uploaded_docs", exist_ok=True) for f in os.listdir("uploaded_docs"): os.remove(os.path.join("uploaded_docs", f)) names = [] for file in files: name = os.path.basename(file.name) dest = os.path.join("uploaded_docs", name) with open(file.name, "rb") as s, open(dest, "wb") as d: d.write(s.read()) names.append(name) docs = load_docs("uploaded_docs") if not docs: return "❌ Documents load nahi hue!" try: rag_chain = build_rag(docs) chars = sum(len(d.page_content) for d in docs) return f"✅ Ready! Files: {', '.join(names)} | Pages: {len(docs)} | Characters: {chars:,}\n\n💬 Ab neeche sawal poochho!" except Exception as e: return f"❌ Error: {str(e)}" def chat_fn(msg, history): global rag_chain, ui_history if not msg.strip(): return "", history if rag_chain is None: return "", history + [("", "⚠️ Pehle PDF upload karo aur Process karo!")] try: ans = rag_chain.invoke({ "question": msg, "chat_history": ui_history }) ui_history.append(HumanMessage(content=msg)) ui_history.append(AIMessage(content=ans)) except Exception as e: ans = f"❌ Error: {str(e)}" print(f"ERROR: {e}") return "", history + [(msg, ans)] def clear_fn(): global ui_history ui_history = [] return [] # ── Gradio UI ───────────────────────────────────────────────── with gr.Blocks(theme=gr.themes.Soft(), title="RAG Chatbot") as demo: gr.Markdown(""" # 🤖 RAG Chatbot — PDF Support **Stack:** FAISS · HuggingFace Embeddings · Groq LLaMA 3.3 > Apni PDF upload karo aur sawal poochho! """) gr.Markdown("### 📤 Step 1: PDF Upload Karo") with gr.Row(): with gr.Column(scale=3): file_input = gr.File( label="PDF / TXT files select karo", file_types=[".pdf", ".txt"], file_count="multiple" ) process_btn = gr.Button("⚙️ Process Documents", variant="primary") with gr.Column(scale=2): status_box = gr.Markdown("📋 Status: Waiting for upload...") gr.Markdown("---\n### 💬 Step 2: Sawal Poochho") chatbot = gr.Chatbot(label="Chat", height=450) with gr.Row(): msg_box = gr.Textbox( placeholder="PDF ke baare mein sawal poochho...", label="Sawal", scale=5 ) send_btn = gr.Button("Send 🚀", variant="primary", scale=1) clear_btn = gr.Button("🗑️ Clear Chat", variant="secondary") gr.Examples( examples=[ "Is document ka summary do", "Main topic kya hai?", "Important points batao", "Koi specific cheez explain karo", ], inputs=msg_box, label="💡 Example Sawaal:" ) gr.Markdown("---\n💬 **Tip:** PDF process hone ke baad sawal poochho. Clear se chat reset hoga.") process_btn.click(fn=process_files, inputs=[file_input], outputs=[status_box]) send_btn.click(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot]) msg_box.submit(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot]) clear_btn.click(fn=clear_fn, outputs=[chatbot]) if __name__ == "__main__": demo.launch()