Spaces:
Runtime error
Runtime error
Fixed nltk bug
Browse files- summarizer.py +6 -1
summarizer.py
CHANGED
|
@@ -3,6 +3,7 @@ from transformers import BartTokenizer, TFBartForConditionalGeneration
|
|
| 3 |
from Utils import get_input_chunks
|
| 4 |
import networkx as nx
|
| 5 |
from nltk.tokenize import sent_tokenize
|
|
|
|
| 6 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 7 |
import community
|
| 8 |
from title_generator import T5Summarizer
|
|
@@ -47,7 +48,11 @@ class BARTSummarizer:
|
|
| 47 |
def preprocess_for_auto_chapters(self, text: str):
|
| 48 |
|
| 49 |
# Tokenize the text into sentences
|
| 50 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
# Filter out empty sentences and sentences with less than 5 words
|
| 53 |
sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]
|
|
|
|
| 3 |
from Utils import get_input_chunks
|
| 4 |
import networkx as nx
|
| 5 |
from nltk.tokenize import sent_tokenize
|
| 6 |
+
import nltk
|
| 7 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 8 |
import community
|
| 9 |
from title_generator import T5Summarizer
|
|
|
|
| 48 |
def preprocess_for_auto_chapters(self, text: str):
|
| 49 |
|
| 50 |
# Tokenize the text into sentences
|
| 51 |
+
try:
|
| 52 |
+
sentences = sent_tokenize(text)
|
| 53 |
+
except:
|
| 54 |
+
nltk.download('punkt')
|
| 55 |
+
sentences = sent_tokenize(text)
|
| 56 |
|
| 57 |
# Filter out empty sentences and sentences with less than 5 words
|
| 58 |
sentences = [sentence for sentence in sentences if len(sentence.strip()) > 0 and len(sentence.split(" ")) > 4]
|