import gradio as gr from langchain_text_splitters import CharacterTextSplitter from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from transformers import pipeline # ------------------------- # Load Data # ------------------------- with open("data.txt", "r") as f: raw_text = f.read() # ------------------------- # Split Text # ------------------------- text_splitter = CharacterTextSplitter( chunk_size=300, chunk_overlap=50 ) texts = text_splitter.split_text(raw_text) # ------------------------- # Embeddings + Vector Store # ------------------------- embeddings = HuggingFaceEmbeddings( model_name="sentence-transformers/all-MiniLM-L6-v2" ) vectorstore = FAISS.from_texts(texts, embeddings) # ------------------------- # LLM (Stable HF Model) # ------------------------- generator = pipeline( "text-generation", model="microsoft/phi-2", max_new_tokens=120, temperature=0.2, do_sample=False ) # ------------------------- # Chat Function (RAG + Memory) # ------------------------- def rag_chat(message, history): # Retrieve relevant docs docs = vectorstore.similarity_search(message, k=3) context = " ".join([doc.page_content for doc in docs]) # Build chat memory (last 3 messages) history_text = "" for user, bot in history[-3:]: history_text += f"User: {user}\nBot: {bot}\n" # Prompt prompt = f""" You are a professional AI assistant. Rules: - Answer ONLY from the context - If answer not found, say "I don't know" - Keep answer short and clear Context: {context} Conversation: {history_text} Question: {message} Answer: """ # Generate response result = generator(prompt)[0]["generated_text"] # Clean output answer = result.replace(prompt, "").strip() answer = answer.split("\n")[0] return answer # ------------------------- # UI # ------------------------- demo = gr.ChatInterface( fn=rag_chat, title="RAG Chatbot .txt base data Trained" ) # ------------------------- # Run App # ------------------------- if __name__ == "__main__": demo.launch()