Spaces:
Sleeping
Sleeping
| # importing required libraries | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.vectorstores import FAISS | |
| from langchain.embeddings import HuggingFaceEmbeddings | |
| import wikipedia | |
| import pandas as pd | |
| from tqdm import tqdm | |
| # reading names of the players in the data and displaying few of them | |
| players = pd.read_csv("artifacts/data.csv", encoding = "latin-1")["Name"].to_list() | |
| # extracting information about the players from their wikipedia pages | |
| content = "" | |
| for player in tqdm(players, desc = "Fetching Data : "): | |
| text = wikipedia.page(player, auto_suggest = False).content | |
| content += player.upper() + text + "\n" | |
| # configuring the embedding function for the text chunks | |
| model_name = "sentence-transformers/all-mpnet-base-v2" | |
| embeddings = HuggingFaceEmbeddings(model_name = model_name) | |
| # splitting the text into text chunks | |
| text_splitter = RecursiveCharacterTextSplitter( | |
| separators = [".", "\n"], | |
| chunk_size = 750, | |
| chunk_overlap = 125, | |
| length_function = len | |
| ) | |
| # storing the text chunks into the vectorstore | |
| documents = text_splitter.split_text(content) | |
| vectorstore = FAISS.from_texts(documents, embeddings) | |
| # saving the FAISS vectorstore | |
| vectorstore.save_local("artifacts\FAISS-Vectorstore") |