Update README.md
Browse files
README.md
CHANGED
|
@@ -152,6 +152,21 @@ print(block_rankings)
|
|
| 152 |
|
| 153 |
# [1, 0]
|
| 154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 155 |
max_context_window = 32
|
| 156 |
pruned_html = gen_embed_pruner.prune_HTML(pruned_html, block_tree, block_rankings, chat_tokenizer, max_context_window)
|
| 157 |
print(pruned_html)
|
|
|
|
| 152 |
|
| 153 |
# [1, 0]
|
| 154 |
|
| 155 |
+
block_tree, pruned_html=build_block_tree(pruned_html, max_node_words=10)
|
| 156 |
+
for block in block_tree:
|
| 157 |
+
print("Block Content: ", block[0])
|
| 158 |
+
print("Block Path: ", block[1])
|
| 159 |
+
print("Is Leaf: ", block[2])
|
| 160 |
+
print("")
|
| 161 |
+
|
| 162 |
+
# Block Content: <title>When was the bellagio in las vegas built?</title>
|
| 163 |
+
# Block Path: ['html', 'title']
|
| 164 |
+
# Is Leaf: True
|
| 165 |
+
#
|
| 166 |
+
# Block Content: <p>The Bellagio is a luxury hotel and casino located on the Las Vegas Strip in Paradise, Nevada. It was built in 1998.</p>
|
| 167 |
+
# Block Path: ['html', 'p']
|
| 168 |
+
# Is Leaf: True
|
| 169 |
+
|
| 170 |
max_context_window = 32
|
| 171 |
pruned_html = gen_embed_pruner.prune_HTML(pruned_html, block_tree, block_rankings, chat_tokenizer, max_context_window)
|
| 172 |
print(pruned_html)
|