Update README.md
Browse files
README.md
CHANGED
|
@@ -55,17 +55,17 @@ text='''In the heart of the bustling city, where towering skyscrapers touch the
|
|
| 55 |
With each passing light year, the anticipation of unraveling secrets that could alter humanity's
|
| 56 |
understanding of life in the universe grew ever stronger.'''
|
| 57 |
|
| 58 |
-
# chunk the text. The
|
| 59 |
-
chunks=model.chunk_text(text, tokenizer,
|
| 60 |
|
| 61 |
# print chunks
|
| 62 |
for i, c in enumerate(chunks):
|
| 63 |
print(f'-----chunk: {i}------------')
|
| 64 |
print(c)
|
| 65 |
|
| 66 |
-
# chunk the text faster, by using a fixed context window, batchsize is the number of windows run per batch.
|
| 67 |
print('----->Here is the result of fast chunk method<------:')
|
| 68 |
-
chunks=model.chunk_text_fast(text, tokenizer, batchsize=20,
|
| 69 |
|
| 70 |
# print chunks
|
| 71 |
for i, c in enumerate(chunks):
|
|
|
|
| 55 |
With each passing light year, the anticipation of unraveling secrets that could alter humanity's
|
| 56 |
understanding of life in the universe grew ever stronger.'''
|
| 57 |
|
| 58 |
+
# chunk the text. The prob_threshold should be between (0, 1). The lower it is, the more chunks will be generated.
|
| 59 |
+
chunks=model.chunk_text(text, tokenizer, prob_threshold=0.5)
|
| 60 |
|
| 61 |
# print chunks
|
| 62 |
for i, c in enumerate(chunks):
|
| 63 |
print(f'-----chunk: {i}------------')
|
| 64 |
print(c)
|
| 65 |
|
| 66 |
+
# chunk the text faster but compromising performance a lot, by using a fixed context window, batchsize is the number of windows run per batch.
|
| 67 |
print('----->Here is the result of fast chunk method<------:')
|
| 68 |
+
chunks=model.chunk_text_fast(text, tokenizer, batchsize=20, prob_threshold=0.5)
|
| 69 |
|
| 70 |
# print chunks
|
| 71 |
for i, c in enumerate(chunks):
|