| # A model for predicting the gender of author of news article | |
| ## Usage: | |
| ``` | |
| import re | |
| from transformers import pipeline | |
| from html import unescape | |
| from unicodedata import normalize | |
| re_multispace = re.compile(r"\s+") | |
| def normalize_text(text): | |
| if text == None: | |
| return None | |
| text = text.strip() | |
| text = text.replace("\n", " ") | |
| text = text.replace("\t", " ") | |
| text = text.replace("\r", " ") | |
| text = re_multispace.sub(" ", text) | |
| text = unescape(text) | |
| text = normalize("NFKC", text) | |
| return text | |
| model = pipeline(task="text-classification", | |
| model=f"hynky/Gender", tokenizer="ufal/robeczech-base", | |
| truncation=True, max_length=512, | |
| top_k=5 | |
| ) | |
| def predict(article): | |
| article = normalize_text(article) | |
| predictions = model(article) | |
| predict("Dnes v noci bude pršet.") | |
| ``` |