| import re |
| try: |
| import pyphen |
| _hyph_pt_br = pyphen.Pyphen(lang='pt_BR') |
| _hyph_pt_pt = pyphen.Pyphen(lang='pt_PT') |
| except Exception: |
| _hyph_pt_br = _hyph_pt_pt = None |
|
|
| |
| WORD_RE_PT = re.compile(r"[A-Za-zÀ-ÖØ-öø-ÿ]+", re.UNICODE) |
|
|
| def tokenize_words_pt(text: str): |
| return WORD_RE_PT.findall(text) |
|
|
| def count_sentences_pt(text: str): |
| |
| parts = re.split(r"[.!?…]+", text) |
| return max(1, sum(1 for p in parts if p.strip())) |
|
|
| def count_syllables_pt(word: str) -> int: |
| |
| if _hyph_pt_br or _hyph_pt_pt: |
| hyph = (_hyph_pt_br or _hyph_pt_pt).inserted(word) |
| return max(1, hyph.count('-') + 1) |
| |
| groups = re.findall(r"[aeiouyAEIOUYàáâãéêíóôõúüÀÁÂÃÉÊÍÓÔÕÚÜ]+", word) |
| return max(1, len(groups)) |
|
|
| |
| def flesch_portuguese(text: str): |
| words = tokenize_words_pt(text) |
| W = len(words) |
| if W == 0: |
| return None |
| S = count_sentences_pt(text) |
| syl = sum(count_syllables_pt(w) for w in words) |
| F = W / S |
| P = syl / W |
| score = 248.835 - 1.015 * F - 84.6 * P |
| return round(score, 2) |
|
|
| |
| def lix(text: str): |
| words = tokenize_words_pt(text) |
| W = len(words) |
| if W == 0: |
| return None |
| S = count_sentences_pt(text) |
| long_words = sum(1 for w in words if len(w) > 6) |
| return round((W / S) + (100.0 * long_words / W), 2) |
|
|
| def rix(text: str): |
| words = tokenize_words_pt(text) |
| W = len(words) |
| if W == 0: |
| return None |
| S = count_sentences_pt(text) |
| long_words = sum(1 for w in words if len(w) > 6) |
| return round(long_words / S, 2) |
|
|
| |
| FRE_PT_BANDS = { |
| 'B1': (70, 100), |
| 'B2': (60, 70), |
| 'B3': (45, 60), |
| } |
| LIX_BANDS = { |
| 'B1': (20, 35), |
| 'B2': (35, 45), |
| 'B3': (45, 60), |
| } |
|
|
| def in_band(score, band, bands, delta=0.0): |
| if score is None: |
| return False |
| lo, hi = bands[band] |
| return (lo - delta) <= score <= (hi + delta) |
|
|
| |
| if __name__ == "__main__": |
| txt = "O paciente está bem. Os exames não mostram sinais de infecção. Recomenda-se apenas acompanhamento." |
| fre = flesch_portuguese(txt) |
| lx = lix(txt) |
| rx = rix(txt) |
| print("FRE-PT:", fre, "B1?", in_band(fre, 'B1', FRE_PT_BANDS, delta=1.0)) |
| print("LIX:", lx, "B1?", in_band(lx, 'B1', LIX_BANDS, delta=2.0)) |
| print("RIX:", rx) |