Spaces:
Sleeping
Sleeping
| # ============================================================ | |
| # RAG Chatbot β Hugging Face Spaces Deployment | |
| # app.py | |
| # ============================================================ | |
| import os, warnings | |
| warnings.filterwarnings("ignore") | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import PyPDFLoader, TextLoader | |
| from langchain_community.vectorstores import FAISS | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| from langchain_groq import ChatGroq | |
| from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder | |
| from langchain_core.messages import HumanMessage, AIMessage | |
| from langchain_core.output_parsers import StrOutputParser | |
| from langchain_core.runnables import RunnablePassthrough, RunnableLambda | |
| import gradio as gr | |
| # API Key β HF Secrets se automatically aayegi | |
| GROQ_API_KEY = os.environ.get("GROQ_API_KEY") | |
| # ββ Document Load Function ββββββββββββββββββββββββββββββββββββ | |
| def load_docs(folder="uploaded_docs"): | |
| all_docs = [] | |
| if not os.path.exists(folder): | |
| os.makedirs(folder) | |
| return all_docs | |
| for fname in os.listdir(folder): | |
| fpath = os.path.join(folder, fname) | |
| try: | |
| if fname.endswith(".pdf"): | |
| docs = PyPDFLoader(fpath).load() | |
| all_docs.extend(docs) | |
| print(f" β PDF: {fname} β {len(docs)} pages") | |
| elif fname.endswith(".txt"): | |
| docs = TextLoader(fpath, encoding="utf-8").load() | |
| all_docs.extend(docs) | |
| print(f" β TXT: {fname}") | |
| except Exception as e: | |
| print(f" β {fname}: {e}") | |
| return all_docs | |
| # ββ RAG Build Function ββββββββββββββββββββββββββββββββββββββββ | |
| def build_rag(docs): | |
| chunks = RecursiveCharacterTextSplitter( | |
| chunk_size=500, | |
| chunk_overlap=80, | |
| separators=["\n\n", "\n", ". ", " ", ""] | |
| ).split_documents(docs) | |
| print(f"β {len(chunks)} chunks") | |
| print("βοΈ Loading embedding model...") | |
| emb = HuggingFaceEmbeddings( | |
| model_name="all-MiniLM-L6-v2", | |
| model_kwargs={"device": "cpu"}, | |
| encode_kwargs={"normalize_embeddings": True} | |
| ) | |
| vs = FAISS.from_documents(chunks, emb) | |
| print("β FAISS ready") | |
| llm = ChatGroq( | |
| groq_api_key=GROQ_API_KEY, | |
| model_name="llama-3.3-70b-versatile", | |
| temperature=0.3, | |
| max_tokens=1024 | |
| ) | |
| print("β Groq connected") | |
| retriever = vs.as_retriever(search_kwargs={"k": 3}) | |
| prompt = ChatPromptTemplate.from_messages([ | |
| ("system", """You are a helpful AI assistant. | |
| Answer questions using ONLY the context provided below. | |
| If the answer is not in the context, say: | |
| I don't have that information in the provided documents. | |
| Context: {context}"""), | |
| MessagesPlaceholder(variable_name="chat_history"), | |
| ("human", "{question}") | |
| ]) | |
| def fmt(docs): | |
| return "\n\n---\n\n".join( | |
| f"[Chunk {i+1}]:\n{d.page_content}" | |
| for i, d in enumerate(docs) | |
| ) | |
| chain = ( | |
| RunnablePassthrough.assign( | |
| context=RunnableLambda( | |
| lambda x: fmt(retriever.invoke(x["question"])) | |
| ) | |
| ) | |
| | prompt | llm | StrOutputParser() | |
| ) | |
| return chain | |
| # ββ Global State ββββββββββββββββββββββββββββββββββββββββββββββ | |
| rag_chain = None | |
| ui_history = [] | |
| # ββ Gradio Functions ββββββββββββββββββββββββββββββββββββββββββ | |
| def process_files(files): | |
| global rag_chain | |
| if not files: | |
| return "β οΈ Koi file select nahi ki!" | |
| os.makedirs("uploaded_docs", exist_ok=True) | |
| for f in os.listdir("uploaded_docs"): | |
| os.remove(os.path.join("uploaded_docs", f)) | |
| names = [] | |
| for file in files: | |
| name = os.path.basename(file.name) | |
| dest = os.path.join("uploaded_docs", name) | |
| with open(file.name, "rb") as s, open(dest, "wb") as d: | |
| d.write(s.read()) | |
| names.append(name) | |
| docs = load_docs("uploaded_docs") | |
| if not docs: | |
| return "β Documents load nahi hue!" | |
| try: | |
| rag_chain = build_rag(docs) | |
| chars = sum(len(d.page_content) for d in docs) | |
| return f"β Ready! Files: {', '.join(names)} | Pages: {len(docs)} | Characters: {chars:,}\n\nπ¬ Ab neeche sawal poochho!" | |
| except Exception as e: | |
| return f"β Error: {str(e)}" | |
| def chat_fn(msg, history): | |
| global rag_chain, ui_history | |
| if not msg.strip(): | |
| return "", history | |
| if rag_chain is None: | |
| return "", history + [("", "β οΈ Pehle PDF upload karo aur Process karo!")] | |
| try: | |
| ans = rag_chain.invoke({ | |
| "question": msg, | |
| "chat_history": ui_history | |
| }) | |
| ui_history.append(HumanMessage(content=msg)) | |
| ui_history.append(AIMessage(content=ans)) | |
| except Exception as e: | |
| ans = f"β Error: {str(e)}" | |
| print(f"ERROR: {e}") | |
| return "", history + [(msg, ans)] | |
| def clear_fn(): | |
| global ui_history | |
| ui_history = [] | |
| return [] | |
| # ββ Gradio UI βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(theme=gr.themes.Soft(), title="RAG Chatbot") as demo: | |
| gr.Markdown(""" | |
| # π€ RAG Chatbot β PDF Support | |
| **Stack:** FAISS Β· HuggingFace Embeddings Β· Groq LLaMA 3.3 | |
| > Apni PDF upload karo aur sawal poochho! | |
| """) | |
| gr.Markdown("### π€ Step 1: PDF Upload Karo") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| file_input = gr.File( | |
| label="PDF / TXT files select karo", | |
| file_types=[".pdf", ".txt"], | |
| file_count="multiple" | |
| ) | |
| process_btn = gr.Button("βοΈ Process Documents", variant="primary") | |
| with gr.Column(scale=2): | |
| status_box = gr.Markdown("π Status: Waiting for upload...") | |
| gr.Markdown("---\n### π¬ Step 2: Sawal Poochho") | |
| chatbot = gr.Chatbot(label="Chat", height=450) | |
| with gr.Row(): | |
| msg_box = gr.Textbox( | |
| placeholder="PDF ke baare mein sawal poochho...", | |
| label="Sawal", | |
| scale=5 | |
| ) | |
| send_btn = gr.Button("Send π", variant="primary", scale=1) | |
| clear_btn = gr.Button("ποΈ Clear Chat", variant="secondary") | |
| gr.Examples( | |
| examples=[ | |
| "Is document ka summary do", | |
| "Main topic kya hai?", | |
| "Important points batao", | |
| "Koi specific cheez explain karo", | |
| ], | |
| inputs=msg_box, | |
| label="π‘ Example Sawaal:" | |
| ) | |
| gr.Markdown("---\n㪠**Tip:** PDF process hone ke baad sawal poochho. Clear se chat reset hoga.") | |
| process_btn.click(fn=process_files, inputs=[file_input], outputs=[status_box]) | |
| send_btn.click(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot]) | |
| msg_box.submit(fn=chat_fn, inputs=[msg_box, chatbot], outputs=[msg_box, chatbot]) | |
| clear_btn.click(fn=clear_fn, outputs=[chatbot]) | |
| if __name__ == "__main__": | |
| demo.launch() |