Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,8 @@ model_name = "roberta-base"
|
|
| 14 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
|
| 15 |
|
| 16 |
def text_to_sentences(text):
|
| 17 |
-
|
|
|
|
| 18 |
|
| 19 |
# function to concatenate sentences into chunks of size 900 or less
|
| 20 |
def chunks_of_900(text, chunk_size=900):
|
|
|
|
| 14 |
tokenizer = RobertaTokenizer.from_pretrained(model_name, map_location=torch.device('cpu'))
|
| 15 |
|
| 16 |
def text_to_sentences(text):
|
| 17 |
+
clean_text = text.replace('\n', ' ')
|
| 18 |
+
return re.split(r'(?<=[^A-Z].[.?]) +(?=[A-Z])', clean_text)
|
| 19 |
|
| 20 |
# function to concatenate sentences into chunks of size 900 or less
|
| 21 |
def chunks_of_900(text, chunk_size=900):
|