Spaces:
Sleeping
Sleeping
| # ============================================================ | |
| # RAG Chatbot β Hugging Face Spaces | |
| # Upload PDFs and ask questions! | |
| # ============================================================ | |
| import os, warnings | |
| warnings.filterwarnings("ignore") | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_groq import ChatGroq | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_core.messages import HumanMessage, AIMessage | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
| import gradio as gr | |
| # API Key from HF Secrets | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "") | |
| # ββ Load PDFs βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def load_pdfs(files): | |
| all_docs = [] | |
| names = [] | |
| for file in files: | |
| try: | |
| loader = PyPDFLoader(file.name) | |
| docs = loader.load() | |
| for doc in docs: | |
| doc.metadata["source"] = os.path.basename(file.name) | |
| all_docs.extend(docs) | |
| names.append(os.path.basename(file.name)) | |
| print(f" β {os.path.basename(file.name)} β {len(docs)} pages") | |
| except Exception as e: | |
| print(f" β Error: {e}") | |
| return all_docs, names | |
| # ββ Build RAG βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def build_rag(all_docs): | |
| chunks = RecursiveCharacterTextSplitter( | |
| chunk_size=600, | |
| chunk_overlap=100, | |
| separators=["\n\n", "\n", ". ", " ", ""] | |
| ).split_documents(all_docs) | |
| print(f" βοΈ {len(chunks)} chunks") | |
| emb = HuggingFaceEmbeddings( | |
| model_name="all-MiniLM-L6-v2", | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True} | |
| ) | |
| vs = FAISS.from_documents(chunks, emb) | |
| llm = ChatGroq( | |
| groq_api_key=GROQ_API_KEY, | |
| model_name="llama-3.3-70b-versatile", | |
| temperature=0.3, | |
| max_tokens=1500 | |
| ) | |
| retriever = vs.as_retriever(search_kwargs={"k": 4}) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", """You are an expert AI assistant. | |
| Answer using ONLY the context below. | |
| Always mention the source document. | |
| If answer not found, say: I don't have that information in the provided documents. | |
| Context: | |
| {context}"""), | |
| MessagesPlaceholder(variable_name="chat_history"), | |
| ("human", "{question}") | |
| ]) | |
| def fmt(docs): | |
| return "\n\n---\n\n".join( | |
| f"[Source: {d.metadata.get('source','?')} | Page {d.metadata.get('page',0)+1}]:\n{d.page_content}" | |
| for d in docs | |
| ) | |
| chain = ( | |
| RunnablePassthrough.assign( | |
| context=RunnableLambda( | |
| lambda x: fmt(retriever.invoke(x["question"])) | |
| ) | |
| ) | |
| | prompt | llm | StrOutputParser() | |
| ) | |
| return chain, len(chunks) | |
| # ββ Global State ββββββββββββββββββββββββββββββββββββββββββββββ | |
| rag_chain = None | |
| ui_history = [] | |
| # ββ Gradio Functions ββββββββββββββββββββββββββββββββββββββββββ | |
| def process_files(files): | |
| global rag_chain | |
| if not files: | |
| return "β οΈ Koi file select nahi ki!", "" | |
| print(f"\nπ Processing {len(files)} file(s)...") | |
| docs, names = load_pdfs(files) | |
| if not docs: | |
| return "β PDFs se content extract nahi hua!", "" | |
| try: | |
| chain, n_chunks = build_rag(docs) | |
| rag_chain = chain | |
| chars = sum(len(d.page_content) for d in docs) | |
| names_list = "\n".join([f"β’ {n}" for n in names]) | |
| return ( | |
| f"β **{len(names)} file(s) loaded!**\n\n{names_list}\n\n" | |
| f"π {len(docs)} pages | {n_chunks} chunks | {chars:,} chars\n\n" | |
| f"π¬ **Ab sawal poochho!**" | |
| ), f"{len(names)} docs" | |
| except Exception as e: | |
| return f"β Error: {str(e)}", "" | |
| def chat_fn(msg, history): | |
| global rag_chain, ui_history | |
| if not msg.strip(): | |
| return "", history | |
| if rag_chain is None: | |
| history.append({ | |
| "role": "assistant", | |
| "content": "β οΈ Pehle PDF upload karo aur Process karo!" | |
| }) | |
| return "", history | |
| try: | |
| ans = rag_chain.invoke({ | |
| "question": msg, | |
| "chat_history": ui_history | |
| }) | |
| ui_history.append(HumanMessage(content=msg)) | |
| ui_history.append(AIMessage(content=ans)) | |
| except Exception as e: | |
| ans = f"β Error: {str(e)}" | |
| print(f"ERROR: {e}") | |
| history.append({"role": "user", "content": msg}) | |
| history.append({"role": "assistant", "content": ans}) | |
| return "", history | |
| def clear_fn(): | |
| global ui_history | |
| ui_history = [] | |
| return [] | |
| # ββ CSS βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| css = """ | |
| @import url('https://fonts.googleapis.com/css2?family=Syne:wght@700;800&family=DM+Sans:wght@300;400;500&display=swap'); | |
| * { box-sizing: border-box; } | |
| body, .gradio-container { | |
| font-family: 'DM Sans', sans-serif !important; | |
| background: #0a0a0f !important; | |
| color: #e8e6f0 !important; | |
| } | |
| .gradio-container { | |
| max-width: 960px !important; | |
| margin: 0 auto !important; | |
| } | |
| .app-title { | |
| font-family: 'Syne', sans-serif !important; | |
| font-size: 2.4rem !important; | |
| font-weight: 800 !important; | |
| background: linear-gradient(135deg, #a78bfa, #60a5fa, #34d399) !important; | |
| -webkit-background-clip: text !important; | |
| -webkit-text-fill-color: transparent !important; | |
| background-clip: text !important; | |
| text-align: center !important; | |
| padding: 32px 0 8px !important; | |
| } | |
| .badge { | |
| display: inline-flex; align-items: center; gap: 5px; | |
| background: rgba(139,92,246,0.1); | |
| border: 1px solid rgba(139,92,246,0.25); | |
| border-radius: 20px; padding: 4px 12px; | |
| font-size: 0.72rem; color: #a78bfa; font-weight: 500; margin: 3px; | |
| } | |
| .section-label { | |
| font-family: 'Syne', sans-serif !important; | |
| font-size: 0.7rem !important; font-weight: 700 !important; | |
| letter-spacing: 2.5px !important; text-transform: uppercase !important; | |
| color: #a78bfa !important; margin: 20px 0 12px !important; | |
| } | |
| textarea, input[type=text] { | |
| background: #0d0d14 !important; | |
| border: 1px solid #1f1f2e !important; | |
| border-radius: 10px !important; | |
| color: #e8e6f0 !important; | |
| font-family: 'DM Sans', sans-serif !important; | |
| font-size: 0.9rem !important; | |
| transition: border-color 0.2s, box-shadow 0.2s !important; | |
| scrollbar-width: thin !important; | |
| scrollbar-color: #2d2d45 transparent !important; | |
| } | |
| textarea:focus, input[type=text]:focus { | |
| border-color: #a78bfa !important; | |
| box-shadow: 0 0 0 3px rgba(139,92,246,0.12) !important; | |
| outline: none !important; | |
| } | |
| textarea::-webkit-scrollbar { width: 4px !important; } | |
| textarea::-webkit-scrollbar-thumb { | |
| background: #2d2d45 !important; border-radius: 10px !important; | |
| } | |
| textarea::-webkit-scrollbar-thumb:hover { background: #a78bfa !important; } | |
| button.primary { | |
| background: linear-gradient(135deg, #7c3aed, #4f46e5) !important; | |
| border: none !important; border-radius: 10px !important; | |
| color: white !important; font-family: 'Syne', sans-serif !important; | |
| font-weight: 600 !important; | |
| box-shadow: 0 4px 15px rgba(124,58,237,0.3) !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| button.primary:hover { | |
| transform: translateY(-1px) !important; | |
| box-shadow: 0 6px 20px rgba(124,58,237,0.4) !important; | |
| } | |
| button.secondary { | |
| background: #13131a !important; | |
| border: 1px solid #2d2d45 !important; | |
| border-radius: 10px !important; | |
| color: #9ca3af !important; transition: all 0.2s !important; | |
| } | |
| button.secondary:hover { | |
| border-color: #a78bfa !important; color: #a78bfa !important; | |
| } | |
| label span { color: #6b7280 !important; font-size: 0.8rem !important; } | |
| .examples-table td, .examples td { | |
| background: #13131a !important; | |
| border: 1px solid #1f1f2e !important; | |
| border-radius: 8px !important; color: #9ca3af !important; | |
| font-size: 0.8rem !important; cursor: pointer !important; | |
| transition: all 0.2s !important; | |
| } | |
| .examples-table td:hover, .examples td:hover { | |
| background: #1e1e30 !important; | |
| color: #a78bfa !important; border-color: #a78bfa !important; | |
| } | |
| """ | |
| # ββ UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks( | |
| css=css, | |
| title="RAG Intelligence", | |
| theme=gr.themes.Base( | |
| primary_hue="violet", | |
| neutral_hue="slate" | |
| ) | |
| ) as demo: | |
| gr.HTML(""" | |
| <div class="app-title">β‘ RAG Intelligence</div> | |
| <div style="text-align:center; color:#6b7280; margin-bottom:16px;"> | |
| Multi-Document AI Β· FAISS Β· Groq LLaMA 3.3 | |
| </div> | |
| <div style="text-align:center; margin-bottom:24px;"> | |
| <span class="badge">π§ HuggingFace</span> | |
| <span class="badge">β‘ Groq LLM</span> | |
| <span class="badge">π FAISS</span> | |
| <span class="badge">π Multi-PDF</span> | |
| </div> | |
| """) | |
| gr.HTML('<div class="section-label">π₯ Upload Your PDFs</div>') | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| file_input = gr.File( | |
| label="PDF files select karo (multiple ho sakti hain)", | |
| file_types=[".pdf"], | |
| file_count="multiple", | |
| ) | |
| process_btn = gr.Button( | |
| "βοΈ Process Documents", | |
| variant="primary" | |
| ) | |
| with gr.Column(scale=2): | |
| status_out = gr.Markdown( | |
| "π **Status:** Waiting for documents..." | |
| ) | |
| badge_out = gr.Markdown("**0 docs loaded**") | |
| gr.HTML('<hr style="border:none;border-top:1px solid #1a1a28;margin:20px 0;">') | |
| gr.HTML('<div class="section-label">π¬ Chat With Documents</div>') | |
| chatbot = gr.Chatbot( | |
| label="", | |
| height=480, | |
| type="messages", | |
| show_label=False, | |
| placeholder="<div style='text-align:center;color:#374151;padding:40px;'>Load documents first, then ask anything! β¦</div>", | |
| ) | |
| with gr.Row(): | |
| msg_box = gr.Textbox( | |
| placeholder="β¦ Apne documents ke baare mein kuch bhi poochho...", | |
| label="", lines=2, max_lines=5, | |
| scale=5, show_label=False, container=False, | |
| ) | |
| with gr.Column(scale=1, min_width=110): | |
| send_btn = gr.Button("Send β€", variant="primary") | |
| clear_btn = gr.Button("Clear π", variant="secondary") | |
| gr.Examples( | |
| examples=[ | |
| "Is document ka summary do", | |
| "Main topics kya hain?", | |
| "Important points bullet mein batao", | |
| "Koi definition explain karo", | |
| "Key concepts list karo", | |
| ], | |
| inputs=msg_box, | |
| label="β¦ Quick Questions", | |
| ) | |
| gr.HTML(""" | |
| <div style="text-align:center;padding:20px 0 8px; | |
| color:#2d2d45;font-size:0.72rem;letter-spacing:1.5px;"> | |
| RAG INTELLIGENCE Β· FAISS Β· GROQ Β· HUGGINGFACE | |
| </div> | |
| """) | |
| # Events | |
| process_btn.click( | |
| fn=process_files, | |
| inputs=[file_input], | |
| outputs=[status_out, badge_out] | |
| ) | |
| send_btn.click( | |
| fn=chat_fn, | |
| inputs=[msg_box, chatbot], | |
| outputs=[msg_box, chatbot] | |
| ) | |
| msg_box.submit( | |
| fn=chat_fn, | |
| inputs=[msg_box, chatbot], | |
| outputs=[msg_box, chatbot] | |
| ) | |
| clear_btn.click(fn=clear_fn, outputs=[chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch() |