jbigs commited on
Commit
c2e9042
·
1 Parent(s): b6476b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -0
app.py CHANGED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nest_asyncio
2
+ nest_asyncio.apply()
3
+
4
+ articles = ["https://www.fantasypros.com/2023/11/rival-fantasy-nfl-week-10/",
5
+ "https://www.fantasypros.com/2023/11/5-stats-to-know-before-setting-your-fantasy-lineup-week-10/",
6
+ "https://www.fantasypros.com/2023/11/nfl-week-10-sleeper-picks-player-predictions-2023/",
7
+ "https://www.fantasypros.com/2023/11/nfl-dfs-week-10-stacking-advice-picks-2023-fantasy-football/",
8
+ "https://www.fantasypros.com/2023/11/players-to-buy-low-sell-high-trade-advice-2023-fantasy-football/"]
9
+
10
+ # Scrapes the blogs above
11
+ loader = AsyncChromiumLoader(articles)
12
+ docs = loader.load()
13
+
14
+ # Converts HTML to plain text
15
+ html2text = Html2TextTransformer()
16
+ docs_transformed = html2text.transform_documents(docs)
17
+
18
+ # Chunk text
19
+ text_splitter = CharacterTextSplitter(chunk_size=100,
20
+ chunk_overlap=0)
21
+ chunked_documents = text_splitter.split_documents(docs_transformed)
22
+
23
+ # Load chunked documents into the FAISS index
24
+ db = FAISS.from_documents(chunked_documents,
25
+ HuggingFaceEmbeddings(model_name='sentence-transformers/all-mpnet-base-v2'))
26
+
27
+
28
+ # Connect query to FAISS index using a retriever
29
+ retriever = db.as_retriever(
30
+ search_type="similarity",
31
+ search_kwargs={'k': 4}
32
+ )