thanhcong2001 commited on
Commit
74feb88
Β·
verified Β·
1 Parent(s): dfdef5c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -5
app.py CHANGED
@@ -4,23 +4,35 @@ from langchain.vectorstores import FAISS
4
  from langchain.chains import ConversationalRetrievalChain
5
  from langchain.llms import HuggingFacePipeline
6
  from langchain.memory import ConversationBufferMemory
 
 
7
  import pandas as pd
8
  df = pd.read_csv('NLP.csv')
9
- corpus = df['text']
10
  #Chunking
11
  splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap = 10)
12
  texts = sum([splitter.split_text(doc) for doc in corpus], [])
13
  # Embeddings
14
- embeddings = HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
15
  # Indexing
16
- db = FAISS.from_texts(texts[:300],embeddings)
17
- retriever = db.as_retriever(search_kwargs={'k':2})
 
 
 
 
 
 
 
 
 
 
18
  # Model
19
  llm = HuggingFacePipeline.from_model_id(model_id='google/flan-t5-large',task='text2text-generation')
20
  # Memory
21
  memory = ConversationBufferMemory(memory_key='chat_history',return_messages=True)
22
  # Combine previous steps
23
- qa = ConversationalRetrievalChain.from_llm(llm=llm,retriever=retriever,memory=memory)
24
  def ans_ques(ques):
25
  result = qa({'question':ques})
26
  return result['answer']
 
4
  from langchain.chains import ConversationalRetrievalChain
5
  from langchain.llms import HuggingFacePipeline
6
  from langchain.memory import ConversationBufferMemory
7
+ from langchain_community.retrievers import BM25Retriever
8
+ from langchain.retrievers import EnsembleRetriever
9
  import pandas as pd
10
  df = pd.read_csv('NLP.csv')
11
+ corpus = df['text'][:300]
12
  #Chunking
13
  splitter = RecursiveCharacterTextSplitter(chunk_size=200,chunk_overlap = 10)
14
  texts = sum([splitter.split_text(doc) for doc in corpus], [])
15
  # Embeddings
16
+ embeddings = HuggingFaceEmbeddings(model_name='all-mnpnet-base-v2')
17
  # Indexing
18
+ db = FAISS.from_texts(texts,embeddings)
19
+ retriever = db.as_retriever(search_kwargs={'k':5})
20
+
21
+ # BM25
22
+ bm25 = BM25Retriever.from_texts(texts)
23
+ bm25.k =5
24
+
25
+ # Hy_brid retriever
26
+ hybrid_retriever = EnsembleRetriever(
27
+ retrievers = [retriever,bm25],
28
+ weights = [0.7,0.3]
29
+ )
30
  # Model
31
  llm = HuggingFacePipeline.from_model_id(model_id='google/flan-t5-large',task='text2text-generation')
32
  # Memory
33
  memory = ConversationBufferMemory(memory_key='chat_history',return_messages=True)
34
  # Combine previous steps
35
+ qa = ConversationalRetrievalChain.from_llm(llm=llm,retriever=hybrid_retriever,memory=memory)
36
  def ans_ques(ques):
37
  result = qa({'question':ques})
38
  return result['answer']