File size: 1,296 Bytes
c2e9042 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | import nest_asyncio
nest_asyncio.apply()
articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
"https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
"https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
"https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
"https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]
# Scrapes the blogs above
loader = AsyncChromiumLoader(articles)
docs = loader.load()
# Converts HTML to plain text
html2text = Html2TextTransformer()
docs_transformed = html2text.transform_documents(docs)
# Chunk text
text_splitter = CharacterTextSplitter(chunk_size=100,
chunk_overlap=0)
chunked_documents = text_splitter.split_documents(docs_transformed)
# Load chunked documents into the FAISS index
db = FAISS.from_documents(chunked_documents,
HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))
# Connect query to FAISS index using a retriever
retriever = db.as_retriever(
search_type="similarity",
search_kwargs={'k': 4}
) |