# app.py import os from langchain.vectorstores import FAISS from langchain.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain_community.llms import GPT4All from langchain.memory import ConversationBufferMemory import gradio as gr # Load embeddings and FAISS vector store def load_vectorstore(): model_name = "sentence-transformers/all-MiniLM-L6-v2" embeddings = HuggingFaceEmbeddings(model_name=model_name) db = FAISS.load_local("vectorstore", embeddings, allow_dangerous_deserialization=True) return db db = load_vectorstore() # Initialize GPT4All model local_path = "./models/ggml-gpt4all-j.bin" # Or any supported GPT4All model callbacks = [StreamingStdOutCallbackHandler()] llm = GPT4All( model=local_path, callbacks=callbacks, verbose=True, ) # Create Retrieval QA Chain qa_chain = RetrievalQA.from_chain_type( llm=llm, chain_type="stuff", retriever=db.as_retriever(k=2), return_source_documents=True ) # Define chat function def chat(message, chat_history): result = qa_chain({"query": message}) response = result["result"] sources = result.get("source_documents", []) if sources: source_info = "\n\nSources:\n" + "\n".join([f"- {doc.metadata}" for doc in sources]) response += source_info return response # Gradio Chat Interface with gr.Blocks() as demo: gr.Markdown("## 🤖 My Offline RAG Chatbot (No API Key Needed)") chatbot = gr.Chatbot() msg = gr.Textbox(label="💬 Your Message") clear = gr.Button("🗑️ Clear Chat") state = gr.State([]) def respond(message, chat_history): bot_response = chat(message, chat_history) chat_history.append((message, bot_response)) return "", chat_history msg.submit(respond, [msg, state], [msg, chatbot]) clear.click(lambda: ([], None), None, [chatbot, state]) if __name__ == "__main__": demo.launch()