| | import re |
| |
|
| | |
| | WORD_RE = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE) |
| |
|
| | def _tokenize_words_es(text: str): |
| | return WORD_RE.findall(text) |
| |
|
| | def _count_sentences_es(text: str) -> int: |
| | |
| | sentences = re.split(r"[.!?…]+|[¡¿]", text) |
| | return max(1, sum(1 for s in sentences if s.strip())) |
| |
|
| | |
| | try: |
| | import pyphen |
| | _dic = pyphen.Pyphen(lang='es') |
| |
|
| | def count_syllables_es(word: str) -> int: |
| | |
| | hyph = _dic.inserted(word) |
| | return max(1, hyph.count('-') + 1) |
| | except Exception: |
| | |
| | def count_syllables_es(word: str) -> int: |
| | w = word.lower() |
| |
|
| | |
| | w = re.sub(r'y$', 'i', w) |
| |
|
| | |
| | w = re.sub(r'que', 'qe', w) |
| | w = re.sub(r'qui', 'qi', w) |
| | w = re.sub(r'gue', 'ge', w) |
| | w = re.sub(r'gui', 'gi', w) |
| |
|
| | vowels = set("aeiouáéíóúü") |
| | strong = set("aáeéoóíú") |
| | n = len(w) |
| | i = 0 |
| | syll = 0 |
| | while i < n: |
| | if w[i] not in vowels: |
| | i += 1 |
| | continue |
| | |
| | j = i + 1 |
| | while j < n and w[j] in vowels: |
| | j += 1 |
| | seq = w[i:j] |
| | |
| | nuclei = 1 |
| | |
| | for k in range(len(seq) - 1): |
| | if seq[k] in strong and seq[k + 1] in strong: |
| | nuclei += 1 |
| | syll += nuclei |
| | i = j |
| | return max(1, syll) |
| |
|
| | |
| | def fernandez_huerta(text: str) -> float | None: |
| | """ |
| | Fernández–Huerta readability for Spanish. |
| | Higher = easier. Typical range ~0–100. |
| | """ |
| | words = _tokenize_words_es(text) |
| | n_words = len(words) |
| | if n_words == 0: |
| | return None |
| | n_sentences = _count_sentences_es(text) |
| | n_syllables = sum(count_syllables_es(w) for w in words) |
| |
|
| | |
| | |
| | fh = 206.84 - 0.60 * ((n_syllables / n_words) * 100.0) - 1.02 * (n_words / n_sentences) |
| | return round(fh, 2) |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |