tim1900 commited on
Commit
e4ff9ce
·
verified ·
1 Parent(s): 2753894

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +4 -4
README.md CHANGED
@@ -55,17 +55,17 @@ text='''In the heart of the bustling city, where towering skyscrapers touch the
55
  With each passing light year, the anticipation of unraveling secrets that could alter humanity's
56
  understanding of life in the universe grew ever stronger.'''
57
 
58
- # chunk the text. The threshold can be (-inf, +inf). The lower threshold is, the more chunks will be generated.
59
- chunks=model.chunk_text(text, tokenizer, threshold=0)
60
 
61
  # print chunks
62
  for i, c in enumerate(chunks):
63
  print(f'-----chunk: {i}------------')
64
  print(c)
65
 
66
- # chunk the text faster, by using a fixed context window, batchsize is the number of windows run per batch.
67
  print('----->Here is the result of fast chunk method<------:')
68
- chunks=model.chunk_text_fast(text, tokenizer, batchsize=20, threshold=0)
69
 
70
  # print chunks
71
  for i, c in enumerate(chunks):
 
55
  With each passing light year, the anticipation of unraveling secrets that could alter humanity's
56
  understanding of life in the universe grew ever stronger.'''
57
 
58
+ # chunk the text. The prob_threshold should be between (0, 1). The lower it is, the more chunks will be generated.
59
+ chunks=model.chunk_text(text, tokenizer, prob_threshold=0.5)
60
 
61
  # print chunks
62
  for i, c in enumerate(chunks):
63
  print(f'-----chunk: {i}------------')
64
  print(c)
65
 
66
+ # chunk the text faster but compromising performance a lot, by using a fixed context window, batchsize is the number of windows run per batch.
67
  print('----->Here is the result of fast chunk method<------:')
68
+ chunks=model.chunk_text_fast(text, tokenizer, batchsize=20, prob_threshold=0.5)
69
 
70
  # print chunks
71
  for i, c in enumerate(chunks):