| import re |
| import pyphen |
|
|
| |
| _dic = pyphen.Pyphen(lang='es_ES') |
|
|
| _word_re = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE) |
|
|
| def _tokenize_words(text): |
| return _word_re.findall(text) |
|
|
| def _count_sentences(text): |
| |
| parts = re.split(r"[.!?¡¿]+", text) |
| return max(1, sum(1 for p in parts if p.strip())) |
|
|
| def _count_syllables_es(word): |
| parts = _dic.hyphenate(word) |
| return (len(parts) + 1) if parts else 1 |
|
|
| def _text_stats_es(text): |
| words = _tokenize_words(text) |
| W = len(words) |
| S = _count_sentences(text) |
| syl = sum(_count_syllables_es(w) for w in words) if W else 0 |
| LW = sum(1 for w in words if len(w) > 6) |
| return W, S, syl, LW |
|
|
| |
| def szigriszt_pazos(text): |
| W, S, syl, _ = _text_stats_es(text) |
| if W == 0 or S == 0: |
| return None |
| |
| return 206.835 - 62.3 * (syl / W) - (W / S) |
|
|
| |
| def lix(text): |
| W, S, _, LW = _text_stats_es(text) |
| if W == 0 or S == 0: |
| return None |
| return (W / S) + (100.0 * LW / W) |
|
|
| |
| SZ_BANDS = { |
| 'B1': (65, 100), |
| 'B2': (55, 65), |
| 'B3': (40, 55), |
| } |
|
|
| LIX_BANDS = { |
| 'B1': (20, 35), |
| 'B2': (35, 45), |
| 'B3': (45, 60), |
| } |
|
|
| def in_band(score, band, bands, delta=0.0): |
| if score is None: |
| return False |
| lo, hi = bands[band] |
| return (lo - delta) <= score <= (hi + delta) |
|
|
| |
| text = "Las vacunas salvan millones de vidas cada año. Son seguras y eficaces." |
| sz = szigriszt_pazos(text) |
| lx = lix(text) |
| |
| |