from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.chains import ConversationalRetrievalChain from langchain.llms import HuggingFacePipeline from langchain.memory import ConversationBufferMemory import pandas as pd df = pd.read_csv('NLP.csv') corpus = df['text'] #Chunking splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap = 10) texts = sum([splitter.split_text(doc) for doc in corpus], []) # Embeddings embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2') # Indexing db = FAISS.from_texts(texts[:300],embeddings) retriever = db.as_retriever(search_kwargs={'k':2}) # Model llm = HuggingFacePipeline.from_model_id(model_id='google/flan-t5-large',task='text2text-generation') # Memory memory = ConversationBufferMemory(memory_key='chat_history',return_messages=True) # Combine previous steps qa = ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,memory=memory) def ans_ques(ques): result = qa({'question':ques}) return result['answer'] import gradio as gr demo = gr.Interface(ans_ques,inputs='text',outputs='text') demo.launch()