Spaces:
Runtime error
Runtime error
| import re | |
| from langdetect import detect | |
| from transformers import pipeline | |
| from utils.tag_utils import filter_tags | |
| AiSummaryVersion = 1 | |
| summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum") | |
| en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en") | |
| classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert") | |
| tag_gen_pipe = pipeline("text2text-generation", model="fabiochiu/t5-base-tag-generation") | |
| def summarize(id: str, text: str): | |
| if text is None or len(text) < 10: | |
| return { | |
| "ver": AiSummaryVersion | |
| } | |
| summary = get_summarization(text) if len(text) > 100 else text | |
| translated = get_en_translation(summary) | |
| tags1 = get_classification(translated) | |
| tags2 = get_tags(translated) | |
| tags = filter_tags(tags1 + tags2) | |
| tags = sorted(list(set(tags))) | |
| value = { | |
| "id": id, | |
| "ver": AiSummaryVersion, | |
| "summary": summary, | |
| "tags": tags, | |
| } | |
| return value | |
| def get_summarization(text: str): | |
| try: | |
| result = summarization_pipeline(text) | |
| return result[0]['summary_text'] if isinstance(result, list) else result['summary_text'] | |
| except Exception as e: | |
| print(e) | |
| return None | |
| def get_en_translation(text: str): | |
| if text is None: | |
| return None | |
| try: | |
| if is_english(text): | |
| return text | |
| result = en_translation_pipe(text) | |
| return result[0]['translation_text'] if isinstance(result, list) else result['translation_text'] | |
| except Exception as e: | |
| print(e) | |
| return None | |
| def is_english(text): | |
| try: | |
| lang = detect(text) | |
| return lang == 'en' | |
| except Exception as e: | |
| print(e) | |
| return False | |
| def get_tags(text: str): | |
| if text is None: | |
| return [] | |
| try: | |
| result = tag_gen_pipe(text) | |
| tag_str = result[0]['generated_text'] if isinstance(result, list) else result['generated_text'] | |
| tags = re.split(r'[&,]', tag_str) | |
| tags = [tag.strip() for tag in tags] | |
| tags = [tag for tag in tags if len(tag) > 2 and len(tag.split(' ')) == 1] | |
| return tags | |
| except Exception as e: | |
| print(e) | |
| return [] | |
| def get_classification(text: str): | |
| if text is None: | |
| return [] | |
| try: | |
| result = classification_pipe(text) | |
| if isinstance(result, list): | |
| return [tag['label'].strip() for tag in result if tag['score'] > 0.75] | |
| else: | |
| return [result['label'].strip()] if result['score'] > 0.75 else [] | |
| except Exception as e: | |
| print(e) | |
| return [] | |