tim1900 commited on
Commit
e36c2ab
·
verified ·
1 Parent(s): 9dc6b56

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -2
README.md CHANGED
@@ -275,7 +275,7 @@ def chunk_text_with_max_chunk_size(model, text, tokenizer, prob_threshold=0.5,ma
275
  unchunk_tokens_this_window = greater_rows_indices[0] if greater_rows_indices[0]!=0 else greater_rows_indices[1]#exclude the fist index
276
 
277
  # manually chunk
278
- if unchunk_tokens + unchunk_tokens_this_window > max_tokens_per_chunk:
279
  big_windows_end = max_tokens_per_chunk - unchunk_tokens
280
  max_value, max_index= logit_diff[:,1:big_windows_end].max(), logit_diff[:,1:big_windows_end].argmax() + 1
281
  if best_logits < max_value:
@@ -316,7 +316,7 @@ def chunk_text_with_max_chunk_size(model, text, tokenizer, prob_threshold=0.5,ma
316
  unchunk_tokens_this_window = min(windows_start+STEP,input_ids.shape[1]) - windows_start
317
 
318
  # manually chunk
319
- if unchunk_tokens + unchunk_tokens_this_window > max_tokens_per_chunk:
320
  big_windows_end = max_tokens_per_chunk - unchunk_tokens
321
  if logit_diff.shape[1] > 1:
322
 
 
275
  unchunk_tokens_this_window = greater_rows_indices[0] if greater_rows_indices[0]!=0 else greater_rows_indices[1]#exclude the fist index
276
 
277
  # manually chunk
278
+ if unchunk_tokens + unchunk_tokens_this_window > max_tokens_per_chunk: #change ">" to ">=" if buggy for the moment
279
  big_windows_end = max_tokens_per_chunk - unchunk_tokens
280
  max_value, max_index= logit_diff[:,1:big_windows_end].max(), logit_diff[:,1:big_windows_end].argmax() + 1
281
  if best_logits < max_value:
 
316
  unchunk_tokens_this_window = min(windows_start+STEP,input_ids.shape[1]) - windows_start
317
 
318
  # manually chunk
319
+ if unchunk_tokens + unchunk_tokens_this_window > max_tokens_per_chunk: #change ">" to ">=" if buggy for the moment
320
  big_windows_end = max_tokens_per_chunk - unchunk_tokens
321
  if logit_diff.shape[1] > 1:
322