Upload folder using huggingface_hub
Browse filesCo-authored-by: Cursor <cursoragent@cursor.com>
- rnnlm_model/__pycache__/__init__.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/__init__.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/__init__.cpython-38.pyc +0 -0
- rnnlm_model/__pycache__/configuration_rnnlm.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/configuration_rnnlm.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/configuration_rnnlm.cpython-38.pyc +0 -0
- rnnlm_model/__pycache__/modeling_rnnlm.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/modeling_rnnlm.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/modeling_rnnlm.cpython-38.pyc +0 -0
- rnnlm_model/__pycache__/pipeline_rnnlm.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/pipeline_rnnlm.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/pipeline_rnnlm.cpython-38.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_rnnlm.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_rnnlm.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_rnnlm.cpython-38.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_utils.cpython-311.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_utils.cpython-312.pyc +0 -0
- rnnlm_model/__pycache__/tokenization_utils.cpython-38.pyc +0 -0
- rnnlm_model/tokenization_utils.py +10 -1
rnnlm_model/__pycache__/__init__.cpython-311.pyc
DELETED
|
Binary file (576 Bytes)
|
|
|
rnnlm_model/__pycache__/__init__.cpython-312.pyc
DELETED
|
Binary file (530 Bytes)
|
|
|
rnnlm_model/__pycache__/__init__.cpython-38.pyc
DELETED
|
Binary file (508 Bytes)
|
|
|
rnnlm_model/__pycache__/configuration_rnnlm.cpython-311.pyc
DELETED
|
Binary file (2.11 kB)
|
|
|
rnnlm_model/__pycache__/configuration_rnnlm.cpython-312.pyc
DELETED
|
Binary file (1.87 kB)
|
|
|
rnnlm_model/__pycache__/configuration_rnnlm.cpython-38.pyc
DELETED
|
Binary file (1.46 kB)
|
|
|
rnnlm_model/__pycache__/modeling_rnnlm.cpython-311.pyc
DELETED
|
Binary file (17.4 kB)
|
|
|
rnnlm_model/__pycache__/modeling_rnnlm.cpython-312.pyc
DELETED
|
Binary file (16.6 kB)
|
|
|
rnnlm_model/__pycache__/modeling_rnnlm.cpython-38.pyc
DELETED
|
Binary file (9.25 kB)
|
|
|
rnnlm_model/__pycache__/pipeline_rnnlm.cpython-311.pyc
DELETED
|
Binary file (6.17 kB)
|
|
|
rnnlm_model/__pycache__/pipeline_rnnlm.cpython-312.pyc
DELETED
|
Binary file (5.38 kB)
|
|
|
rnnlm_model/__pycache__/pipeline_rnnlm.cpython-38.pyc
DELETED
|
Binary file (3.38 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_rnnlm.cpython-311.pyc
DELETED
|
Binary file (17.4 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_rnnlm.cpython-312.pyc
DELETED
|
Binary file (15.3 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_rnnlm.cpython-38.pyc
DELETED
|
Binary file (9.78 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_utils.cpython-311.pyc
DELETED
|
Binary file (24.6 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_utils.cpython-312.pyc
DELETED
|
Binary file (18.1 kB)
|
|
|
rnnlm_model/__pycache__/tokenization_utils.cpython-38.pyc
DELETED
|
Binary file (11.8 kB)
|
|
|
rnnlm_model/tokenization_utils.py
CHANGED
|
@@ -351,7 +351,16 @@ def filter_gen_seq(encoder, seq, n_sents=1, eos_tokens=[]):
|
|
| 351 |
else:
|
| 352 |
seq = getattr(doc, 'text', getattr(doc, 'string', str(doc)))
|
| 353 |
else:
|
| 354 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
if leading_space and seq:
|
| 356 |
seq = " " + seq.lstrip()
|
| 357 |
return seq
|
|
|
|
| 351 |
else:
|
| 352 |
seq = getattr(doc, 'text', getattr(doc, 'string', str(doc)))
|
| 353 |
else:
|
| 354 |
+
sentences = segment(encoder, seq)
|
| 355 |
+
n = n_sents
|
| 356 |
+
seq = ""
|
| 357 |
+
while n <= len(sentences):
|
| 358 |
+
seq = " ".join(sentences[:n]).strip()
|
| 359 |
+
if seq:
|
| 360 |
+
break
|
| 361 |
+
n += 1
|
| 362 |
+
if not seq and sentences:
|
| 363 |
+
seq = " ".join(sentences).strip()
|
| 364 |
if leading_space and seq:
|
| 365 |
seq = " " + seq.lstrip()
|
| 366 |
return seq
|