Spaces:
Runtime error
Runtime error
quyip
commited on
Commit
·
e2ec8e0
1
Parent(s):
51a46da
fix
Browse files- utils/summary_utils.py +12 -13
utils/summary_utils.py
CHANGED
|
@@ -5,11 +5,13 @@ from transformers import pipeline
|
|
| 5 |
|
| 6 |
from utils.tag_utils import filter_tags
|
| 7 |
|
| 8 |
-
AiSummaryVersion =
|
| 9 |
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
|
| 10 |
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
| 11 |
classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
def summarize(id: str, text: str):
|
|
@@ -37,8 +39,7 @@ def get_summarization(text: str):
|
|
| 37 |
try:
|
| 38 |
result = summarization_pipeline(text)
|
| 39 |
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
|
| 40 |
-
except
|
| 41 |
-
print(e)
|
| 42 |
return None
|
| 43 |
|
| 44 |
|
|
@@ -50,8 +51,7 @@ def get_en_translation(text: str):
|
|
| 50 |
return text
|
| 51 |
result = en_translation_pipe(text)
|
| 52 |
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
|
| 53 |
-
except
|
| 54 |
-
print(e)
|
| 55 |
return None
|
| 56 |
|
| 57 |
|
|
@@ -59,8 +59,7 @@ def is_english(text):
|
|
| 59 |
try:
|
| 60 |
lang = detect(text)
|
| 61 |
return lang == 'en'
|
| 62 |
-
except
|
| 63 |
-
print(e)
|
| 64 |
return False
|
| 65 |
|
| 66 |
|
|
@@ -68,14 +67,15 @@ def get_tags(text: str):
|
|
| 68 |
if text is None:
|
| 69 |
return []
|
| 70 |
try:
|
| 71 |
-
result =
|
|
|
|
|
|
|
| 72 |
tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
|
| 73 |
tags = re.split(r'[&,]', tag_str)
|
| 74 |
tags = [tag.strip() for tag in tags]
|
| 75 |
tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1]
|
| 76 |
return tags
|
| 77 |
-
except
|
| 78 |
-
print(e)
|
| 79 |
return []
|
| 80 |
|
| 81 |
|
|
@@ -88,6 +88,5 @@ def get_classification(text: str):
|
|
| 88 |
return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
|
| 89 |
else:
|
| 90 |
return [result['label'].strip()] if result['score'] > 0.75 else []
|
| 91 |
-
except
|
| 92 |
-
print(e)
|
| 93 |
return []
|
|
|
|
| 5 |
|
| 6 |
from utils.tag_utils import filter_tags
|
| 7 |
|
| 8 |
+
AiSummaryVersion = 2
|
| 9 |
summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
|
| 10 |
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
|
| 11 |
classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
|
| 12 |
+
tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
|
| 13 |
+
tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
|
| 14 |
+
tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
|
| 15 |
|
| 16 |
|
| 17 |
def summarize(id: str, text: str):
|
|
|
|
| 39 |
try:
|
| 40 |
result = summarization_pipeline(text)
|
| 41 |
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
|
| 42 |
+
except:
|
|
|
|
| 43 |
return None
|
| 44 |
|
| 45 |
|
|
|
|
| 51 |
return text
|
| 52 |
result = en_translation_pipe(text)
|
| 53 |
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
|
| 54 |
+
except:
|
|
|
|
| 55 |
return None
|
| 56 |
|
| 57 |
|
|
|
|
| 59 |
try:
|
| 60 |
lang = detect(text)
|
| 61 |
return lang == 'en'
|
| 62 |
+
except:
|
|
|
|
| 63 |
return False
|
| 64 |
|
| 65 |
|
|
|
|
| 67 |
if text is None:
|
| 68 |
return []
|
| 69 |
try:
|
| 70 |
+
result = tag_gen_pipe_1(text)
|
| 71 |
+
print('XXXXXXXXXXXXXXXXXXXXX')
|
| 72 |
+
print(result)
|
| 73 |
tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text']
|
| 74 |
tags = re.split(r'[&,]', tag_str)
|
| 75 |
tags = [tag.strip() for tag in tags]
|
| 76 |
tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1]
|
| 77 |
return tags
|
| 78 |
+
except:
|
|
|
|
| 79 |
return []
|
| 80 |
|
| 81 |
|
|
|
|
| 88 |
return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
|
| 89 |
else:
|
| 90 |
return [result['label'].strip()] if result['score'] > 0.75 else []
|
| 91 |
+
except:
|
|
|
|
| 92 |
return []
|