Update app.py
Browse files
app.py
CHANGED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import nest_asyncio
|
| 2 |
+
nest_asyncio.apply()
|
| 3 |
+
|
| 4 |
+
articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
|
| 5 |
+
"https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
|
| 6 |
+
"https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
|
| 7 |
+
"https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
|
| 8 |
+
"https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]
|
| 9 |
+
|
| 10 |
+
# Scrapes the blogs above
|
| 11 |
+
loader = AsyncChromiumLoader(articles)
|
| 12 |
+
docs = loader.load()
|
| 13 |
+
|
| 14 |
+
# Converts HTML to plain text
|
| 15 |
+
html2text = Html2TextTransformer()
|
| 16 |
+
docs_transformed = html2text.transform_documents(docs)
|
| 17 |
+
|
| 18 |
+
# Chunk text
|
| 19 |
+
text_splitter = CharacterTextSplitter(chunk_size=100,
|
| 20 |
+
chunk_overlap=0)
|
| 21 |
+
chunked_documents = text_splitter.split_documents(docs_transformed)
|
| 22 |
+
|
| 23 |
+
# Load chunked documents into the FAISS index
|
| 24 |
+
db = FAISS.from_documents(chunked_documents,
|
| 25 |
+
HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
# Connect query to FAISS index using a retriever
|
| 29 |
+
retriever = db.as_retriever(
|
| 30 |
+
search_type="similarity",
|
| 31 |
+
search_kwargs={'k': 4}
|
| 32 |
+
)
|