Spaces:
Build error
Build error
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -880,15 +880,24 @@ class SentimentAnalyzer:
|
|
| 880 |
# Clean and prepare text
|
| 881 |
text = text.replace('\n', ' ').strip()
|
| 882 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 883 |
# Split text into chunks if it's too long
|
| 884 |
chunks = self._split_text(text)
|
| 885 |
|
| 886 |
summaries = []
|
| 887 |
for chunk in chunks:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
# Generate summary for each chunk
|
| 889 |
summary = self.summarizer(chunk,
|
| 890 |
-
max_length=
|
| 891 |
-
min_length=
|
| 892 |
do_sample=False)[0]['summary_text']
|
| 893 |
summaries.append(summary)
|
| 894 |
|
|
@@ -1051,15 +1060,24 @@ class TextSummarizer:
|
|
| 1051 |
# Clean and prepare text
|
| 1052 |
text = text.replace('\n', ' ').strip()
|
| 1053 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
# Split text into chunks if it's too long
|
| 1055 |
chunks = self._split_text(text)
|
| 1056 |
|
| 1057 |
summaries = []
|
| 1058 |
for chunk in chunks:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1059 |
# Generate summary for each chunk
|
| 1060 |
summary = self.summarizer(chunk,
|
| 1061 |
-
max_length=
|
| 1062 |
-
min_length=
|
| 1063 |
do_sample=False)[0]['summary_text']
|
| 1064 |
summaries.append(summary)
|
| 1065 |
|
|
|
|
| 880 |
# Clean and prepare text
|
| 881 |
text = text.replace('\n', ' ').strip()
|
| 882 |
|
| 883 |
+
# For very short texts, return as is
|
| 884 |
+
if len(text.split()) < 30:
|
| 885 |
+
return text
|
| 886 |
+
|
| 887 |
# Split text into chunks if it's too long
|
| 888 |
chunks = self._split_text(text)
|
| 889 |
|
| 890 |
summaries = []
|
| 891 |
for chunk in chunks:
|
| 892 |
+
# Calculate appropriate max_length based on input length
|
| 893 |
+
input_words = len(chunk.split())
|
| 894 |
+
max_length = min(130, max(30, input_words // 2))
|
| 895 |
+
min_length = min(30, max(10, input_words // 4))
|
| 896 |
+
|
| 897 |
# Generate summary for each chunk
|
| 898 |
summary = self.summarizer(chunk,
|
| 899 |
+
max_length=max_length,
|
| 900 |
+
min_length=min_length,
|
| 901 |
do_sample=False)[0]['summary_text']
|
| 902 |
summaries.append(summary)
|
| 903 |
|
|
|
|
| 1060 |
# Clean and prepare text
|
| 1061 |
text = text.replace('\n', ' ').strip()
|
| 1062 |
|
| 1063 |
+
# For very short texts, return as is
|
| 1064 |
+
if len(text.split()) < 30:
|
| 1065 |
+
return text
|
| 1066 |
+
|
| 1067 |
# Split text into chunks if it's too long
|
| 1068 |
chunks = self._split_text(text)
|
| 1069 |
|
| 1070 |
summaries = []
|
| 1071 |
for chunk in chunks:
|
| 1072 |
+
# Calculate appropriate max_length based on input length
|
| 1073 |
+
input_words = len(chunk.split())
|
| 1074 |
+
max_length = min(130, max(30, input_words // 2))
|
| 1075 |
+
min_length = min(30, max(10, input_words // 4))
|
| 1076 |
+
|
| 1077 |
# Generate summary for each chunk
|
| 1078 |
summary = self.summarizer(chunk,
|
| 1079 |
+
max_length=max_length,
|
| 1080 |
+
min_length=min_length,
|
| 1081 |
do_sample=False)[0]['summary_text']
|
| 1082 |
summaries.append(summary)
|
| 1083 |
|