| import re |
|
|
| |
| WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE) |
|
|
| def _tokenize_words_es(text: str): |
| return WORD_RE.findall(text) |
|
|
| def _count_sentences_es(text: str) -> int: |
| |
| sentences = re.split(r"[.!?…]+|[¡¿]", text) |
| return max(1, sum(1 for s in sentences if s.strip())) |
|
|
| |
| try: |
| import pyphen |
| _dic = pyphen.Pyphen(lang='es') |
|
|
| def count_syllables_es(word: str) -> int: |
| |
| hyph = _dic.inserted(word) |
| return max(1, hyph.count('-') + 1) |
| except Exception: |
| |
| def count_syllables_es(word: str) -> int: |
| w = word.lower() |
|
|
| |
| w = re.sub(r'y$', 'i', w) |
|
|
| |
| w = re.sub(r'que', 'qe', w) |
| w = re.sub(r'qui', 'qi', w) |
| w = re.sub(r'gue', 'ge', w) |
| w = re.sub(r'gui', 'gi', w) |
|
|
| vowels = set("aeiouáéíóúü") |
| strong = set("aáeéoóíú") |
| n = len(w) |
| i = 0 |
| syll = 0 |
| while i < n: |
| if w[i] not in vowels: |
| i += 1 |
| continue |
| |
| j = i + 1 |
| while j < n and w[j] in vowels: |
| j += 1 |
| seq = w[i:j] |
| |
| nuclei = 1 |
| |
| for k in range(len(seq) - 1): |
| if seq[k] in strong and seq[k + 1] in strong: |
| nuclei += 1 |
| syll += nuclei |
| i = j |
| return max(1, syll) |
|
|
| |
| def fernandez_huerta(text: str) -> float | None: |
| """ |
| Fernández–Huerta readability for Spanish. |
| Higher = easier. Typical range ~0–100. |
| """ |
| words = _tokenize_words_es(text) |
| n_words = len(words) |
| if n_words == 0: |
| return None |
| n_sentences = _count_sentences_es(text) |
| n_syllables = sum(count_syllables_es(w) for w in words) |
|
|
| |
| |
| fh = 206.84 - 0.60 * ((n_syllables / n_words) * 100.0) - 1.02 * (n_words / n_sentences) |
| return round(fh, 2) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |