Spaces:

jbigs
/

test

Sleeping

test / app.py

Update app.py

c2e9042 about 2 years ago

1.3 kB

	import nest_asyncio
	nest_asyncio.apply()

	articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
	"https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
	"https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
	"https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
	"https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]

	# Scrapes the blogs above
	loader = AsyncChromiumLoader(articles)
	docs = loader.load()

	# Converts HTML to plain text
	html2text = Html2TextTransformer()
	docs_transformed = html2text.transform_documents(docs)

	# Chunk text
	text_splitter = CharacterTextSplitter(chunk_size=100,
	chunk_overlap=0)
	chunked_documents = text_splitter.split_documents(docs_transformed)

	# Load chunked documents into the FAISS index
	db = FAISS.from_documents(chunked_documents,
	HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))


	# Connect query to FAISS index using a retriever
	retriever = db.as_retriever(
	search_type="similarity",
	search_kwargs={'k': 4}
	)