| | import re |
| | import pyphen |
| |
|
| | |
| | _dic = pyphen.Pyphen(lang='es_ES') |
| |
|
| | _word_re = re.compile(r"[A-Za-zÁÉÍÓÚÜÑáéíóúüñ]+", re.UNICODE) |
| |
|
| | def _tokenize_words(text): |
| | return _word_re.findall(text) |
| |
|
| | def _count_sentences(text): |
| | |
| | parts = re.split(r"[.!?¡¿]+", text) |
| | return max(1, sum(1 for p in parts if p.strip())) |
| |
|
| | def _count_syllables_es(word): |
| | parts = _dic.hyphenate(word) |
| | return (len(parts) + 1) if parts else 1 |
| |
|
| | def _text_stats_es(text): |
| | words = _tokenize_words(text) |
| | W = len(words) |
| | S = _count_sentences(text) |
| | syl = sum(_count_syllables_es(w) for w in words) if W else 0 |
| | LW = sum(1 for w in words if len(w) > 6) |
| | return W, S, syl, LW |
| |
|
| | |
| | def szigriszt_pazos(text): |
| | W, S, syl, _ = _text_stats_es(text) |
| | if W == 0 or S == 0: |
| | return None |
| | |
| | return 206.835 - 62.3 * (syl / W) - (W / S) |
| |
|
| | |
| | def lix(text): |
| | W, S, _, LW = _text_stats_es(text) |
| | if W == 0 or S == 0: |
| | return None |
| | return (W / S) + (100.0 * LW / W) |
| |
|
| | |
| | SZ_BANDS = { |
| | 'B1': (65, 100), |
| | 'B2': (55, 65), |
| | 'B3': (40, 55), |
| | } |
| |
|
| | LIX_BANDS = { |
| | 'B1': (20, 35), |
| | 'B2': (35, 45), |
| | 'B3': (45, 60), |
| | } |
| |
|
| | def in_band(score, band, bands, delta=0.0): |
| | if score is None: |
| | return False |
| | lo, hi = bands[band] |
| | return (lo - delta) <= score <= (hi + delta) |
| |
|
| | |
| | text = "Las vacunas salvan millones de vidas cada año. Son seguras y eficaces." |
| | sz = szigriszt_pazos(text) |
| | lx = lix(text) |
| | |
| | |