Spaces:

Rauhan
/

FootballRAG

Sleeping

FootballRAG / faiss_setup.py

Adding Files

fc0f3cf almost 2 years ago

1.25 kB

	# importing required libraries
	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain.vectorstores import FAISS
	from langchain.embeddings import HuggingFaceEmbeddings
	import wikipedia
	import pandas as pd
	from tqdm import tqdm

	# reading names of the players in the data and displaying few of them
	players = pd.read_csv("artifacts/data.csv", encoding = "latin-1")["Name"].to_list()

	# extracting information about the players from their wikipedia pages
	content = ""
	for player in tqdm(players, desc = "Fetching Data : "):
	text = wikipedia.page(player, auto_suggest = False).content
	content += player.upper() + text + "\n"

	# configuring the embedding function for the text chunks
	model_name = "sentence-transformers/all-mpnet-base-v2"
	embeddings = HuggingFaceEmbeddings(model_name = model_name)

	# splitting the text into text chunks
	text_splitter = RecursiveCharacterTextSplitter(
	separators = [".", "\n"],
	chunk_size = 750,
	chunk_overlap = 125,
	length_function = len
	)

	# storing the text chunks into the vectorstore
	documents = text_splitter.split_text(content)
	vectorstore = FAISS.from_texts(documents, embeddings)

	# saving the FAISS vectorstore
	vectorstore.save_local("artifacts\FAISS-Vectorstore")